In [79]:
import numpy as np
import pandas as pd
import plotly.express as px

In [80]:
car_price = pd.read_csv('/content/car_price.csv')
car_price.head(2)

Unnamed: 0,Car ID,Brand,Year,Engine Size,Fuel Type,Transmission,Mileage,Condition,Price,Model
0,1,Tesla,2016,2.3,Petrol,Manual,114832,New,26613.92,Model X
1,2,BMW,2018,4.4,Electric,Manual,143190,Used,14679.61,5 Series


## **data cleaning**

In [81]:
car_price.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8750 entries, 0 to 8749
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Car ID        8750 non-null   int64  
 1   Brand         8750 non-null   object 
 2   Year          8750 non-null   int64  
 3   Engine Size   8750 non-null   float64
 4   Fuel Type     8750 non-null   object 
 5   Transmission  8750 non-null   object 
 6   Mileage       8750 non-null   int64  
 7   Condition     8750 non-null   object 
 8   Price         8750 non-null   float64
 9   Model         8750 non-null   object 
dtypes: float64(2), int64(3), object(5)
memory usage: 683.7+ KB


In [82]:
car_price.duplicated().sum()

np.int64(0)

In [83]:
car_price.isnull().sum()

Unnamed: 0,0
Car ID,0
Brand,0
Year,0
Engine Size,0
Fuel Type,0
Transmission,0
Mileage,0
Condition,0
Price,0
Model,0


In [84]:
car_price.columns = car_price.columns.str.lower()

In [85]:
car_price.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8750 entries, 0 to 8749
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   car id        8750 non-null   int64  
 1   brand         8750 non-null   object 
 2   year          8750 non-null   int64  
 3   engine size   8750 non-null   float64
 4   fuel type     8750 non-null   object 
 5   transmission  8750 non-null   object 
 6   mileage       8750 non-null   int64  
 7   condition     8750 non-null   object 
 8   price         8750 non-null   float64
 9   model         8750 non-null   object 
dtypes: float64(2), int64(3), object(5)
memory usage: 683.7+ KB


## **data analysis**

In [86]:
car_price.describe()

Unnamed: 0,car id,year,engine size,mileage,price
count,8750.0,8750.0,8750.0,8750.0,8750.0
mean,1253.659657,2011.567886,3.496114,148963.624457,53106.915877
std,720.050777,6.964052,1.471319,88119.193624,27414.331529
min,1.0,2000.0,1.0,15.0,5011.27
25%,633.0,2005.0,2.2,70602.25,29259.73
50%,1254.0,2012.0,3.5,147733.0,54194.93
75%,1873.75,2018.0,4.8,225295.0,76480.0775
max,2500.0,2023.0,6.0,299967.0,99982.59


In [87]:
car_price.describe(include = 'object')

Unnamed: 0,brand,fuel type,transmission,condition,model
count,8750,8750,8750,8750,8750
unique,7,4,2,3,28
top,Tesla,Diesel,Manual,New,3 Series
freq,1302,2235,4410,2956,337


# data viualization using plotly

## data visualization

# **Numerical**

In [88]:
px.histogram(data_frame = car_price,x = 'price', nbins=100 , color = 'transmission', color_discrete_sequence=['pink', 'blue'],title="Distribution of Car Prices by Transmission Type")

In [89]:
px.histogram(data_frame = car_price,x = 'engine size', nbins=100 , color = 'transmission', color_discrete_sequence=['brown', 'orange'], marginal='box',title="Distribution of engine size by Transmission Type")

In [90]:
px.histogram(data_frame = car_price,x = 'price', nbins=100 , color = 'condition', color_discrete_sequence=['brown', 'orange','gold'],title="Distribution of Car Prices by condition")

In [91]:
px.histogram(data_frame = car_price,x = 'mileage', nbins=100 , color = 'condition', color_discrete_sequence=['blue', 'pink','purple'], marginal='box',title="Distribution of mileage by condition")

In [92]:
px.histogram(data_frame = car_price,x = 'price', nbins=50 , color = 'condition', color_discrete_sequence=['pink', 'blue', 'purple'],facet_col='fuel type' )

In [93]:
px.histogram(data_frame = car_price,x = 'mileage', nbins=50 , color = 'transmission', color_discrete_sequence=['pink', 'blue'],facet_col='fuel type' )

In [94]:
px.histogram(data_frame = car_price,x = 'engine size', nbins=50 , color = 'transmission', color_discrete_sequence=['pink', 'blue'],facet_col='condition' )

In [95]:
px.histogram(data_frame = car_price,x = 'mileage', nbins=50 , color_discrete_sequence=['blue'],facet_col='transmission' )

In [96]:
px.histogram(data_frame = car_price,x = 'price', nbins=50 , color = 'fuel type', color_discrete_sequence=['pink', 'blue','purple' ,'lightblue'],facet_col='condition' )

In [97]:
px.pie(car_price , 'condition',color_discrete_sequence=['green', 'mediumspringgreen', 'lightgreen'])

In [98]:
px.pie(car_price , 'transmission', color_discrete_sequence=['lavender','purple'])

In [99]:
px.pie(car_price , values = 'price', names = 'fuel type' , color_discrete_sequence=['pink', 'blue','purple' ,'lightblue'])

In [100]:
px.pie(car_price , values = 'engine size', names = 'condition' , color_discrete_sequence=['pink','purple' ,'lightblue'])

In [101]:
px.scatter(car_price,x='price',y='engine size',color='transmission',color_discrete_sequence=['brown' , 'orchid'])

In [102]:
px.scatter(car_price,x='price',y='mileage',color='transmission')

In [103]:
px.scatter(car_price,x='mileage',y='engine size',color='condition')

In [104]:
px.scatter(car_price,x='mileage',y='engine size',color='transmission' , marginal_x='histogram',marginal_y='box',color_discrete_sequence=['red','blue'])

In [105]:
px.scatter_matrix(car_price,dimensions=['price','engine size','mileage'],color_discrete_sequence=['blue'] , width=800, height=800)

In [106]:
engine_size_by_year_transmission = car_price.groupby(['year', 'transmission'])['engine size'].mean().reset_index()
fig = px.line(engine_size_by_year_transmission, x='year', y='engine size', color='transmission',color_discrete_sequence=px.colors.qualitative.Dark2)
fig.show()

In [107]:
mileage_by_year_condition = car_price.groupby(['year', 'condition'])['mileage'].mean().reset_index()
fig = px.line(mileage_by_year_condition, x='year', y='mileage', color='condition',color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [108]:
price_by_year_condition = car_price.groupby(['year', 'condition'])['price'].mean().reset_index()
fig = px.line(price_by_year_condition, x='year', y='price', color='condition',color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [109]:
price_by_year_transmission = car_price.groupby(['year', 'transmission'])['price'].mean().reset_index()
fig = px.line(price_by_year_transmission, x='year', y='price', color='transmission',color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

# **categorical**

In [110]:
px.box(car_price, y ='fuel type', x = 'price',color = 'transmission', title="box plot" , color_discrete_sequence=['Green','orange'])

In [111]:
px.box(car_price, y ='brand', x = 'price',color = 'transmission', title="box plot" , color_discrete_sequence=['Green','orange'])

In [112]:
px.box(car_price, y ='condition', x = 'mileage',color = 'transmission', title="box plot" , color_discrete_sequence=['Green','orange'])

In [113]:
px.box(car_price, y ='transmission', x = 'engine size',color = 'condition', title="box plot" , color_discrete_sequence=['Green','orange'])

In [114]:
px.violin(car_price, y = 'fuel type', x = 'price', title="violin plot" , color_discrete_sequence=['red','pink'] , color='transmission')

In [115]:
px.violin(car_price, y ='brand', x = 'price', title="violin plot" , color_discrete_sequence=['red','pink'] , color='transmission')

In [116]:
px.violin(car_price, y ='condition', x = 'engine size', title="violin plot" , color_discrete_sequence=['red','pink'] , color='transmission')

In [117]:
px.violin(car_price, y ='transmission', x = 'mileage', title="violin plot" , color_discrete_sequence=['red','pink','fuchsia' ] , color='condition')

In [118]:
px.violin(car_price, y ='transmission', x = 'mileage', title="violin plot" , color_discrete_sequence=['red','pink','fuchsia' ] , color='condition', box = True)

In [119]:
px.violin(car_price, y ='condition', x = 'engine size', title="violin plot" , color_discrete_sequence=['red','pink'] , color='transmission', box= True)

In [120]:
px.strip(car_price, x = 'brand', y = 'price', title="strip plot" , color_discrete_sequence=["blue" , "orange"], color='transmission')

In [121]:
px.strip(car_price, x = 'fuel type', y = 'price', title="strip plot" , color_discrete_sequence=["blue" , "orange" , "brown"], color='condition')

In [122]:
px.strip(car_price, x = 'transmission', y = 'price', title="strip plot" , color_discrete_sequence=["blue" , "orange" , "brown"], color='condition')

In [123]:
import plotly.graph_objects as go
fig1 = px.box(car_price, x = 'fuel type', y = 'price', color_discrete_sequence=['orange'])
fig2 = px.violin(car_price, x = 'fuel type', y = 'price',color_discrete_sequence=['blue'])
all_fig = go.Figure(data=fig1.data + fig2.data)
all_fig.show()

In [124]:
px.bar(car_price, x = 'year', y = 'price' , color = 'transmission', color_discrete_sequence=['orange','blue'] , barmode='group')

In [125]:
px.bar(car_price, x = 'fuel type', y = 'price' , color = 'transmission', color_discrete_sequence=['orange','blue'] , barmode='group')

In [126]:
px.bar(car_price, x = 'brand', y = 'price' , color = 'transmission', color_discrete_sequence=['orange','blue'] , barmode='group')

In [127]:
px.bar(car_price, x = 'brand', y = 'engine size' , color = 'transmission', color_discrete_sequence=['orange','blue'] , barmode='group')

In [128]:
px.histogram(car_price, y = 'year' , color = 'transmission', color_discrete_sequence=px.colors.qualitative.Light24_r , barmode='group')

In [129]:
px.histogram(car_price, y = 'fuel type' , color = 'transmission', color_discrete_sequence=px.colors.qualitative.Light24_r , barmode='group')

In [130]:
px.histogram(car_price, y = 'brand' , color = 'transmission', color_discrete_sequence=px.colors.qualitative.Light24_r , barmode='group')

In [131]:
selected_cols = car_price[['price', 'engine size', 'mileage']]
corr_matrix = selected_cols.corr()
px.imshow(corr_matrix, text_auto=True)