In [1]:
import pandas as pd

df = pd.read_csv("data/cars.csv")

df.head()

Unnamed: 0,Make,Model,Price,Year,Kilometer,Fuel Type,Transmission,Location,Color,Owner,Seller Type,Engine,Max Power,Max Torque,Drivetrain,Length,Width,Height,Seating Capacity,Fuel Tank Capacity
0,Honda,Amaze 1.2 VX i-VTEC,505000,2017,87150,Petrol,Manual,Pune,Grey,First,Corporate,1198 cc,87 bhp @ 6000 rpm,109 Nm @ 4500 rpm,FWD,3990.0,1680.0,1505.0,5.0,35.0
1,Maruti Suzuki,Swift DZire VDI,450000,2014,75000,Diesel,Manual,Ludhiana,White,Second,Individual,1248 cc,74 bhp @ 4000 rpm,190 Nm @ 2000 rpm,FWD,3995.0,1695.0,1555.0,5.0,42.0
2,Hyundai,i10 Magna 1.2 Kappa2,220000,2011,67000,Petrol,Manual,Lucknow,Maroon,First,Individual,1197 cc,79 bhp @ 6000 rpm,112.7619 Nm @ 4000 rpm,FWD,3585.0,1595.0,1550.0,5.0,35.0
3,Toyota,Glanza G,799000,2019,37500,Petrol,Manual,Mangalore,Red,First,Individual,1197 cc,82 bhp @ 6000 rpm,113 Nm @ 4200 rpm,FWD,3995.0,1745.0,1510.0,5.0,37.0
4,Toyota,Innova 2.4 VX 7 STR [2016-2020],1950000,2018,69000,Diesel,Manual,Mumbai,Grey,First,Individual,2393 cc,148 bhp @ 3400 rpm,343 Nm @ 1400 rpm,RWD,4735.0,1830.0,1795.0,7.0,55.0


In [2]:
df_price_by_brand = (
    df
    .groupby("Make")
    .agg(
        {
            'Price': 'median'
        }
    )
    .rename(
        columns={
            'Price': 'Median Price'
        }
    )
    .sort_values(by='Median Price', ascending=False)
    .reset_index()
)

# display(df_price_by_brand)

from great_tables import (
    GT, md, google_font, style, loc
)

heatmap = (
    GT(
        df_price_by_brand[:20]
    )

    .data_color(
        columns = 'Median Price',
        palette = 'Blues',
        domain = [-10000000, 35000000]
    )
    .fmt_number(
        columns='Median Price',
        n_sigfig=3,
        compact=True
    )

    .tab_header(
        title = md("**Which Car Brands Are Most Expensive?**"),
        subtitle = md("A comparison of the top 20 median car prices across<br>brands—exploring which manufacturers command<br>higher prices in the market.")
    )
    .opt_align_table_header('left')

    .tab_style(
        # custom font for heading
        style=style.text(font=google_font('Lora')),
        locations=loc.title()
    )
    .tab_options(
        heading_title_font_size='26px',
        heading_subtitle_font_size='16px',
        column_labels_font_size='14px',
        table_font_size = '12px',
        data_row_padding = 1,
    )
    .opt_table_font(font=google_font('Sora'))
)

display(heatmap)

Which Car Brands Are Most Expensive?,Which Car Brands Are Most Expensive?
A comparison of the top 20 median car prices across brands—exploring which manufacturers command higher prices in the market.,A comparison of the top 20 median car prices across brands—exploring which manufacturers command higher prices in the market.
Make,Median Price
Ferrari,35.0M
Lamborghini,24.0M
Rolls-Royce,20.0M
Porsche,9.00M
Maserati,9.00M
Land Rover,4.95M
Lexus,4.38M
Mercedes-Benz,3.85M
BMW,3.55M
Jaguar,3.35M


In [3]:
import plotly.express as px

df_price_by_year = (
    df
    .groupby('Year')
    .agg(
        {
            'Price': 'median',
            'Make': 'count'
        }
    )
    .rename(
        columns={
            'Make': 'Car Count',
            'Price': 'Median Price'
        }
    )
    .reset_index()
    .sort_values(by='Year', ascending=False)
)


fig = px.bar(
    df_price_by_year[df_price_by_year['Car Count'] > 1].reset_index()[['Year', 'Median Price', 'Car Count']],
    x = 'Year',
    y = 'Median Price',
    title = '<b>How Car Prices Have Changed Over Time</b>',
    subtitle = 'Tracking the median price of used cars by year, alongside the number of cars available in each model year. Does the market favor newer models,<br>or do older classics still hold value?',
    height = 400,
    width = 1000,
    # Bar color
    color_discrete_sequence=['skyblue'],
    # Template
    template='simple_white',
    # Range
    # range_y = [0, 100]
)

fig.update_layout(
    xaxis_ticks="",
    font_family = 'Sora',
    title_font_family = 'Lora',
)

fig.update_yaxes(
    showgrid=True,
    gridcolor='lightgray',
    gridwidth=0.5
)

fig.show()

In [4]:
df_price_by_location = (
    df.groupby(['Location', 'Seller Type'])
    .agg(
        {
            'Price': 'median'
        }
    )
    .rename(
        columns={
            'Price': 'Median Price'
        }
    )
    .reset_index()
)

top_5_cities = (
    df['Location'].value_counts()
    .nlargest(5)
    .index
)

df_price_by_location = (
    df_price_by_location
    [df_price_by_location['Location'].isin(top_5_cities)]
    .reset_index(drop=True)
)

display(df_price_by_location)



fig = px.bar(
    df_price_by_location,
    x = 'Median Price',
    y = 'Location',
    facet_col = 'Seller Type',
    facet_col_wrap=1,
    title = '<b>Where Are Cars Most Expensive?</b>',
    labels={'Median Price': 'Median Price (in currency)', 'Location': 'City'},
    subtitle = 'Corporate sellers consistently list used cars at higher median prices compared to individual<br>sellers across the top 5 major cities.',
    height = 500,
    width = 1000,
    # Bar color
    color_discrete_sequence=['lightskyblue'],
    # Template
    template='simple_white',
    # Range
    # range_x = [0, 7000000],
    text='Median Price',
)

fig.update_layout(
    title_x=0.1,
    margin={'t': 150},
    title_font_size=22,
    yaxis_ticks="",
    font_family = 'Sora',
    title_font_family = 'Lora',
)

fig.update_traces(textposition = 'inside')
fig.update_xaxes(visible = False)

fig.show()

Unnamed: 0,Location,Seller Type,Median Price
0,Bangalore,Individual,1235000.0
1,Delhi,Corporate,3270000.0
2,Delhi,Individual,1450000.0
3,Hyderabad,Corporate,5500000.0
4,Hyderabad,Individual,935000.0
5,Mumbai,Corporate,2100000.0
6,Mumbai,Individual,875000.0
7,Pune,Corporate,1345000.0
8,Pune,Individual,842500.0
