# P4.Implement a python program to demonstrate 
## 1) Importing Datasets 
## 2) Cleaning the Data 
## 3) Data frame manipulation using Numpy

#### 1) Importing Datasets 

In [12]:
import pandas as pd

# Read the CSV file and specify index col and na_values parameters
cars_data = pd.read_csv("Toyota-2.csv", index_col=0, na_values=["??", "????"])

# Display the first 5 rows of the DataFrame
print("First 5 rows:")
print(cars_data.head())

# Display the last 3 rows of the DataFrame
print("\nLast 3 rows:")
print(cars_data.tail(3))

# Display the index of the DataFrame
print("\nIndex:")
print(cars_data.index)

# Display the columns of the DataFrame
print("\nColumns:")
print(cars_data.columns)

# Display the shape of the DataFrame (number of rows and columns)
print("\nShape:")
print(cars_data.shape)



First 5 rows:
   Price   Age       KM FuelType    HP  MetColor  Automatic    CC  Doors  \
0  13500  23.0  46986.0   Diesel  90.0       1.0          0  2000  three   
1  13750  23.0  72937.0   Diesel  90.0       1.0          0  2000      3   
2  13950  24.0  41711.0   Diesel  90.0       NaN          0  2000      3   
3  14950  26.0  48000.0   Diesel  90.0       0.0          0  2000      3   
4  13750  30.0  38500.0   Diesel  90.0       0.0          0  2000      3   

   Weight  
0    1165  
1    1165  
2    1165  
3    1165  
4    1170  

Last 3 rows:
      Price   Age       KM FuelType     HP  MetColor  Automatic    CC Doors  \
1433   8500   NaN  17016.0   Petrol   86.0       0.0          0  1300     3   
1434   7250  70.0      NaN      NaN   86.0       1.0          0  1300     3   
1435   6950  76.0      1.0   Petrol  110.0       0.0          0  1600     5   

      Weight  
1433    1015  
1434    1015  
1435    1114  

Index:
Index([   0,    1,    2,    3,    4,    5,    6,    7,    

#### 2) Cleaning the Data 

In [15]:
# Cleaning the data - Drop features that are not required
cars_data2 = cars_data.copy()
cars_data2 = cars_data2.drop(['Doors', 'Weight'], axis=1)

# Display the shape of the modified DataFrame
print("\nShape after dropping columns:")
print(cars_data2.shape)

# Dealing with missing values

# Identifying missing values (NaN)
sum_na = cars_data2.isna().sum()
print("\nSum of null values:\n", sum_na)

missing=cars_data2[cars_data2.isnull().any(axis=1)]
print("Missing values :\n",missing)


Shape after dropping columns:
(1436, 8)

Sum of null values:
 Price          0
Age          100
KM            15
FuelType     100
HP             6
MetColor     150
Automatic      0
CC             0
dtype: int64
Missing values :
       Price   Age       KM FuelType    HP  MetColor  Automatic    CC
2     13950  24.0  41711.0   Diesel  90.0       NaN          0  2000
6     16900  27.0      NaN   Diesel   NaN       NaN          0  2000
7     18600  30.0  75889.0      NaN  90.0       1.0          0  2000
9     12950  23.0  71138.0   Diesel   NaN       NaN          0  1900
15    22000  28.0  18739.0   Petrol   NaN       0.0          0  1800
...     ...   ...      ...      ...   ...       ...        ...   ...
1428   8450  72.0      NaN   Petrol  86.0       NaN          0  1300
1431   7500   NaN  20544.0   Petrol  86.0       1.0          0  1300
1432  10845  72.0      NaN   Petrol  86.0       0.0          0  1300
1433   8500   NaN  17016.0   Petrol  86.0       0.0          0  1300
1434   7250

#### 3) Data frame manipulation using Numpy

In [19]:
# Calculating the mean value of the 'Age' variable
cars_data2['Age'].mean()
cars_data2['Age'].median()
cars_data2['HP'].fillna(cars_data2['HP'].mean(), inplace=True)

# getting the value counts of "Fuel Type"
fuel_type_counts = cars_data2['FuelType'].value_counts()

# filling missing values in "Fuel Type" column with the mode value
cars_data2['FuelType'].fillna(fuel_type_counts.index[0], inplace=True)

# printing the mode value of "MetColor" column
print(cars_data2['MetColor'].mode())

0    1.0
Name: MetColor, dtype: float64
