In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


In [2]:
df = pd.read_csv('car_price.csv')
print(df.head())
print(df.info())

   Unnamed: 0                                car_name car_prices_in_rupee  \
0           0  Jeep Compass 2.0 Longitude Option BSIV          10.03 Lakh   
1           1            Renault Duster RXZ Turbo CVT          12.83 Lakh   
2           2                      Toyota Camry 2.5 G          16.40 Lakh   
3           3                       Honda Jazz VX CVT           7.77 Lakh   
4           4        Volkswagen Polo 1.2 MPI Highline           5.15 Lakh   

   kms_driven fuel_type transmission  ownership  manufacture   engine    Seats  
0  86,226 kms    Diesel       Manual  1st Owner         2017  1956 cc  5 Seats  
1  13,248 kms    Petrol    Automatic  1st Owner         2021  1330 cc  5 Seats  
2  60,343 kms    Petrol    Automatic  1st Owner         2016  2494 cc  5 Seats  
3  26,696 kms    Petrol    Automatic  1st Owner         2018  1199 cc  5 Seats  
4  69,414 kms    Petrol       Manual  1st Owner         2016  1199 cc  5 Seats  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5

In [3]:
missing_values = df.isnull().sum()
print("Missing Values:")
print(missing_values)

Missing Values:
Unnamed: 0             0
car_name               0
car_prices_in_rupee    0
kms_driven             0
fuel_type              0
transmission           0
ownership              0
manufacture            0
engine                 0
Seats                  0
dtype: int64


In [5]:
def convert_price(price_str):
    try:
        num_value = float(price_str.split(" ")[0])

        return num_value*100000
    except ValueError:
        return None
df['car_prices_in_rupee'] = df['car_prices_in_rupee'].apply(convert_price)
print(df.head())
        

   Unnamed: 0                                car_name  car_prices_in_rupee  \
0           0  Jeep Compass 2.0 Longitude Option BSIV            1003000.0   
1           1            Renault Duster RXZ Turbo CVT            1283000.0   
2           2                      Toyota Camry 2.5 G            1640000.0   
3           3                       Honda Jazz VX CVT             777000.0   
4           4        Volkswagen Polo 1.2 MPI Highline             515000.0   

   kms_driven fuel_type transmission  ownership  manufacture   engine    Seats  
0  86,226 kms    Diesel       Manual  1st Owner         2017  1956 cc  5 Seats  
1  13,248 kms    Petrol    Automatic  1st Owner         2021  1330 cc  5 Seats  
2  60,343 kms    Petrol    Automatic  1st Owner         2016  2494 cc  5 Seats  
3  26,696 kms    Petrol    Automatic  1st Owner         2018  1199 cc  5 Seats  
4  69,414 kms    Petrol       Manual  1st Owner         2016  1199 cc  5 Seats  


In [7]:
unique_fuel_types = df['fuel_type'].unique()
print("Unique Fuel Types:", unique_fuel_types)

unique_transmissions = df['transmission'].unique()
print("Unique Transmissions:", unique_transmissions)

Unique Fuel Types: ['Diesel' 'Petrol' 'Cng' 'Electric' 'Lpg']
Unique Transmissions: ['Manual' 'Automatic']


In [10]:
def one_hot_encode(df, columns_to_encode):
    df_encoded = pd.get_dummies(df, columns=columns_to_encode, drop_first=True)
    return df_encoded 
data = {'fuel_type': ['Diesel', 'Petrol', 'Cng', 'Electric', 'Lpg'],
        'transmission': ['Manual', 'Automatic', 'Manual', 'Automatic', 'Manual']}
df = pd.DataFrame(data)
columns_to_encode = ['fuel_type', 'transmission'] 

# Apply the one_hot_encode function
df_encoded = one_hot_encode(df, columns_to_encode)

# Display the DataFrame after one-hot encoding
print(df_encoded) 

   fuel_type_Diesel  fuel_type_Electric  fuel_type_Lpg  fuel_type_Petrol  \
0              True               False          False             False   
1             False               False          False              True   
2             False               False          False             False   
3             False                True          False             False   
4             False               False           True             False   

   transmission_Manual  
0                 True  
1                False  
2                 True  
3                False  
4                 True  
