# Pandas Practice

In [108]:
import pandas as pd
import os

### Pandas 2 Main DataTypes
* Series --> 1 Dimensional
* DataFrame --> 2 Dimensional --> Python Dictionary

In [109]:
# Series
car_model = pd.Series(["BMW", "Toyota", "Honda"])
car_model

0       BMW
1    Toyota
2     Honda
dtype: object

In [110]:
# Series
car_color = pd.Series(["Red", "Blue", "White"])
car_color

0      Red
1     Blue
2    White
dtype: object

In [111]:
# Data Frame
car_data = pd.DataFrame({"Car":car_model, "Color":car_color})
car_data

Unnamed: 0,Car,Color
0,BMW,Red
1,Toyota,Blue
2,Honda,White


## Importing Data using "read_csv"

In [112]:
# get current working directory
current_dir = os.getcwd()

#Project Directory
project_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))

#construct the path
file_path = os.path.join(project_dir, 'data','car-sales.csv')

car_sales = pd.read_csv(file_path)
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


# Describing Data

In [113]:
# Attribute (dtypes)
car_sales.dtypes

Make             object
Colour           object
Odometer (KM)     int64
Doors             int64
Price            object
dtype: object

In [114]:
car_sales.columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [115]:
car_columns = car_sales.columns
car_columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [116]:
car_sales.index

RangeIndex(start=0, stop=10, step=1)

In [117]:
# describe() gives statistical information
# describe() works on only numeric columns
car_sales.describe()

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [118]:
car_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Make           10 non-null     object
 1   Colour         10 non-null     object
 2   Odometer (KM)  10 non-null     int64 
 3   Doors          10 non-null     int64 
 4   Price          10 non-null     object
dtypes: int64(2), object(3)
memory usage: 532.0+ bytes


In [119]:
car_sales.describe().mean()

Odometer (KM)    69307.327717
Doors                4.308926
dtype: float64

In [120]:
car_sales.sum()

Make             ToyotaHondaToyotaBMWNissanToyotaHondaHondaToyo...
Colour               WhiteRedBlueBlackWhiteGreenBlueBlueWhiteWhite
Odometer (KM)                                               786014
Doors                                                           40
Price            $4,000.00$5,000.00$7,000.00$22,000.00$3,500.00...
dtype: object

In [121]:
car_sales.describe().sum()

Odometer (KM)    554458.621735
Doors                34.471405
dtype: float64

In [122]:
car_sales["Doors"].sum()

40

In [123]:
len(car_sales)

10

# Viewing and Selecting Data

In [124]:
# head() gives a quick snapshot of the dataframe
# first 5 rows
car_sales.head()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [125]:
car_sales.head(3)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"


In [126]:
# tail() gives the bottom 5 rows or last 5 rows of the dataframe
car_sales.tail()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [127]:
car_sales.tail(2)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [128]:
# .loc and .iloc
animals = pd.Series(["cat", "dog", "bird", "fish", "snake", "pandas"], index=[0,1,3,3,2,5])
animals

0       cat
1       dog
3      bird
3      fish
2     snake
5    pandas
dtype: object

In [129]:
# .loc refers to index
animals.loc[1]

'dog'

In [130]:
# .loc refers to index
animals.loc[3]

3    bird
3    fish
dtype: object

In [131]:
# .iloc refers to position
animals.iloc[3]

'fish'

In [132]:
car_sales.loc[1]

Make                 Honda
Colour                 Red
Odometer (KM)        87899
Doors                    4
Price            $5,000.00
Name: 1, dtype: object

In [133]:
animals

0       cat
1       dog
3      bird
3      fish
2     snake
5    pandas
dtype: object

In [134]:
# Slicing
animals.iloc[:3]

0     cat
1     dog
3    bird
dtype: object

In [135]:
animals.loc[:3]

0     cat
1     dog
3    bird
3    fish
dtype: object

In [136]:
# Accessing Columns
car_sales["Make"]

0    Toyota
1     Honda
2    Toyota
3       BMW
4    Nissan
5    Toyota
6     Honda
7     Honda
8    Toyota
9    Nissan
Name: Make, dtype: object

In [137]:
car_sales.Make

0    Toyota
1     Honda
2    Toyota
3       BMW
4    Nissan
5    Toyota
6     Honda
7     Honda
8    Toyota
9    Nissan
Name: Make, dtype: object

In [138]:
# filtering
car_sales[car_sales["Make"] == "Toyota"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
5,Toyota,Green,99213,4,"$4,500.00"
8,Toyota,White,60000,4,"$6,250.00"


In [139]:
car_sales[car_sales["Odometer (KM)"] > 150000]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [140]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [141]:
# crosstab
cross_doors = pd.crosstab(car_sales["Make"], car_sales["Doors"])
cross_doors

Doors,3,4,5
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BMW,0,0,1
Honda,0,3,0
Nissan,0,2,0
Toyota,1,3,0


In [142]:
cross_colour = pd.crosstab(car_sales["Make"], car_sales["Colour"])
cross_colour

Colour,Black,Blue,Green,Red,White
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BMW,1,0,0,0,0
Honda,0,2,0,1,0
Nissan,0,0,0,0,2
Toyota,0,1,1,0,2
