## Topics
- DataFrame
- import data
- Attribute & Function
- Handling missing data
- practice

In [1]:
import pandas as pd

## Series is 1D

In [3]:
series = pd.Series(["Audi","BMW","Nexa"])

In [4]:
series

0    Audi
1     BMW
2    Nexa
dtype: object

## DataFrame is 2D

In [6]:
car = pd.DataFrame({"Brand":series,"Price":["50L","60L","15L"]})

In [7]:
car

Unnamed: 0,Brand,Price
0,Audi,50L
1,BMW,60L
2,Nexa,15L


## Import data

In [9]:
car_sales = pd.read_csv("car-sales.csv")

In [10]:
car_sales.head()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


## importing data from URL

In [12]:
car_sales_url = pd.read_csv("https://raw.githubusercontent.com/mrdbourke/zero-to-mastery-ml/master/data/heart-disease.csv")

In [13]:
car_sales_url.head(1)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1


## Describe Data

 ## Attribute      ,    Function
car_sales.dtypes  ,  car_sales.to_csv()

In [16]:
car_sales.columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [17]:
car_sales.describe()

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [18]:
car_sales.mean(numeric_only=True) #new update

Odometer (KM)    78601.4
Doors                4.0
dtype: float64

In [19]:
car_sales['Doors'].std()

0.4714045207910317

In [20]:
car_sales[car_sales["Make"] == "Nissan"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
4,Nissan,White,213095,4,"$3,500.00"
9,Nissan,White,31600,4,"$9,700.00"


In [21]:
car_sales.groupby(["Colour","Make"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Odometer (KM),Doors,Price
Colour,Make,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Black,BMW,1,1,1
Blue,Honda,2,2,2
Blue,Toyota,1,1,1
Green,Toyota,1,1,1
Red,Honda,1,1,1
White,Nissan,2,2,2
White,Toyota,2,2,2


## Manuplating Data in Pandas

In [23]:
car_s_missing = pd.read_csv("car-sales-missing-data.csv")
car_s_missing

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,,4.0,"$4,500"
6,Honda,,,4.0,"$7,500"
7,Honda,Blue,,4.0,
8,Toyota,White,60000.0,,
9,,White,31600.0,4.0,"$9,700"


In [24]:
car_s_missing['Odometer'] = round(car_s_missing["Odometer"].fillna(car_s_missing.Odometer.mean(numeric_only=True)),1)


In [25]:
car_s_missing

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,92302.7,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,92302.7,4.0,"$4,500"
6,Honda,,92302.7,4.0,"$7,500"
7,Honda,Blue,92302.7,4.0,
8,Toyota,White,60000.0,,
9,,White,31600.0,4.0,"$9,700"


In [26]:
car_sales["seat"] = pd.Series([5,5,5,5,5])

In [27]:
car_sales.fillna(5,inplace=True)

In [28]:
car_sales["Milage/ltr"] = [25,30,18,33,15,22,19,23,12,26]

In [29]:
car_sales["Toatal Fuel Used in ltr"] = car_sales["Odometer (KM)"]/car_sales["Milage/ltr"]

In [30]:
car_sales["Total Money on fuel in $"] = car_sales["Toatal Fuel Used in ltr"] * 1.25

## Project

In [31]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,seat,Milage/ltr,Toatal Fuel Used in ltr,Total Money on fuel in $
0,Toyota,White,150043,4,"$4,000.00",5.0,25,6001.72,7502.15
1,Honda,Red,87899,4,"$5,000.00",5.0,30,2929.966667,3662.458333
2,Toyota,Blue,32549,3,"$7,000.00",5.0,18,1808.277778,2260.347222
3,BMW,Black,11179,5,"$22,000.00",5.0,33,338.757576,423.44697
4,Nissan,White,213095,4,"$3,500.00",5.0,15,14206.333333,17757.916667
5,Toyota,Green,99213,4,"$4,500.00",5.0,22,4509.681818,5637.102273
6,Honda,Blue,45698,4,"$7,500.00",5.0,19,2405.157895,3006.447368
7,Honda,Blue,54738,4,"$7,000.00",5.0,23,2379.913043,2974.891304
8,Toyota,White,60000,4,"$6,250.00",5.0,12,5000.0,6250.0
9,Nissan,White,31600,4,"$9,700.00",5.0,26,1215.384615,1519.230769


In [32]:
import random
# Assuming car_sales is your DataFrame
car_sales["Safety Rating"] = [random.uniform(4.5, 5) for _ in range(10)]

In [33]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,seat,Milage/ltr,Toatal Fuel Used in ltr,Total Money on fuel in $,Safety Rating
0,Toyota,White,150043,4,"$4,000.00",5.0,25,6001.72,7502.15,4.943199
1,Honda,Red,87899,4,"$5,000.00",5.0,30,2929.966667,3662.458333,4.658705
2,Toyota,Blue,32549,3,"$7,000.00",5.0,18,1808.277778,2260.347222,4.77393
3,BMW,Black,11179,5,"$22,000.00",5.0,33,338.757576,423.44697,4.736798
4,Nissan,White,213095,4,"$3,500.00",5.0,15,14206.333333,17757.916667,4.718077
5,Toyota,Green,99213,4,"$4,500.00",5.0,22,4509.681818,5637.102273,4.749766
6,Honda,Blue,45698,4,"$7,500.00",5.0,19,2405.157895,3006.447368,4.641272
7,Honda,Blue,54738,4,"$7,000.00",5.0,23,2379.913043,2974.891304,4.754375
8,Toyota,White,60000,4,"$6,250.00",5.0,12,5000.0,6250.0,4.7975
9,Nissan,White,31600,4,"$9,700.00",5.0,26,1215.384615,1519.230769,4.865597


In [34]:
css = car_sales.sample(frac=1)

In [35]:
css.sample(frac=0.5)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,seat,Milage/ltr,Toatal Fuel Used in ltr,Total Money on fuel in $,Safety Rating
4,Nissan,White,213095,4,"$3,500.00",5.0,15,14206.333333,17757.916667,4.718077
5,Toyota,Green,99213,4,"$4,500.00",5.0,22,4509.681818,5637.102273,4.749766
9,Nissan,White,31600,4,"$9,700.00",5.0,26,1215.384615,1519.230769,4.865597
1,Honda,Red,87899,4,"$5,000.00",5.0,30,2929.966667,3662.458333,4.658705
0,Toyota,White,150043,4,"$4,000.00",5.0,25,6001.72,7502.15,4.943199


In [36]:
car_sales.rename(columns={"Total Money on fuel in $":"Money on Fuel"},inplace=True)

In [37]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,seat,Milage/ltr,Toatal Fuel Used in ltr,Money on Fuel,Safety Rating
0,Toyota,White,150043,4,"$4,000.00",5.0,25,6001.72,7502.15,4.943199
1,Honda,Red,87899,4,"$5,000.00",5.0,30,2929.966667,3662.458333,4.658705
2,Toyota,Blue,32549,3,"$7,000.00",5.0,18,1808.277778,2260.347222,4.77393
3,BMW,Black,11179,5,"$22,000.00",5.0,33,338.757576,423.44697,4.736798
4,Nissan,White,213095,4,"$3,500.00",5.0,15,14206.333333,17757.916667,4.718077
5,Toyota,Green,99213,4,"$4,500.00",5.0,22,4509.681818,5637.102273,4.749766
6,Honda,Blue,45698,4,"$7,500.00",5.0,19,2405.157895,3006.447368,4.641272
7,Honda,Blue,54738,4,"$7,000.00",5.0,23,2379.913043,2974.891304,4.754375
8,Toyota,White,60000,4,"$6,250.00",5.0,12,5000.0,6250.0,4.7975
9,Nissan,White,31600,4,"$9,700.00",5.0,26,1215.384615,1519.230769,4.865597
