In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

from plotly.subplots import make_subplots

In [2]:
df = pd.read_csv("vehicles_us.csv")
df = df.dropna(subset="model_year").reset_index(drop=True)
df["date_posted"] = pd.to_datetime(df["date_posted"])
df["manufacturer"] = df["model"].apply(lambda x: x.split()[0])
df["model_year"] = df["model_year"].astype(int)
df.head()

Unnamed: 0,price,model_year,model,condition,cylinders,fuel,odometer,transmission,type,paint_color,is_4wd,date_posted,days_listed,manufacturer
0,9400,2011,bmw x5,good,6.0,gas,145000.0,automatic,SUV,,1.0,2018-06-23,19,bmw
1,5500,2013,hyundai sonata,like new,4.0,gas,110000.0,automatic,sedan,red,,2019-02-07,79,hyundai
2,1500,2003,ford f-150,fair,8.0,gas,,automatic,pickup,,,2019-03-22,9,ford
3,14900,2017,chrysler 200,excellent,4.0,gas,80903.0,automatic,sedan,black,,2019-04-02,28,chrysler
4,14990,2014,chrysler 300,excellent,6.0,gas,57954.0,automatic,sedan,black,1.0,2018-06-20,15,chrysler


In [3]:
df = df[
    (df["model_year"] == 2019) &
    (df["manufacturer"] == "ford")
]

In [4]:
def create_pie_trace(df, column_name):
    fig = px.pie(df, names=column_name, hole=0.4)
    pie_data = fig.data[0]
    pie_data.update(
        textposition="inside",
        hoverinfo="label+percent+value",
        hovertemplate="%{label}<br>%{value}"
    )
    return pie_data

columns_for_pie = ["model", "condition", "fuel", "transmission", "type"]
subplot_titles = [f"Distribution of {col_name.capitalize()}" for col_name in columns_for_pie]

fig = make_subplots(rows=1, cols=5, subplot_titles=subplot_titles, specs=[[{'type':'domain'}] * 5])

for i, col_name in enumerate(columns_for_pie):
    pie_trace = create_pie_trace(df, col_name)
    fig.add_trace(pie_trace, row=1, col=i+1)

fig.update_layout(margin=dict(b=0, l=0, r=0), showlegend=False)
fig.show()

In [5]:
fig = px.histogram(df, x="price", title="Distribution of Prices")
fig.update_layout(
    xaxis_title=None,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    margin=dict(b=0, l=0, r=0),
)
fig.show()

In [6]:
fig = px.scatter(df, x="odometer", y="price", title="Price vs. Odometer")
fig.update_layout(
    xaxis_title="Odometer",
    yaxis_title="Price",
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    margin=dict(b=0, l=0, r=0),
    legend_orientation="h",
)
fig.update_traces(marker=dict(size=10))
fig.show()

In [7]:
df_trend = df.set_index("date_posted").resample("D").sum()

fig = px.line(df_trend, x=df_trend.index.strftime('%Y-%m-%d'), y="days_listed", title="Total Days Vehicles Were Listed Each Day")
fig.update_layout(
    xaxis_title=None,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    margin=dict(b=0, l=0, r=0),
)
fig.show()