# Car Sales


In [1]:
import polars as pl

In [20]:
df = pl.read_csv("data/raw/car_sales/data.csv")
print("raw shape: ", df.shape)
rename_map = {col: col.lower().replace(" ", "_") for col in df.columns}

df = df.rename(rename_map).with_row_count("id")
df = df.with_columns(
    [
        pl.col(col).str.replace_all(" ", "_").str.to_lowercase()
        for col in df.columns
        if df[col].dtype == pl.Utf8
    ]
).with_columns(
    pl.col(
        "make",
        "model",
        "engine_fuel_type",
        "transmission_type",
        "driven_wheels",
        "vehicle_size",
        "vehicle_style",
    ).cast(pl.Categorical),
)
df.head()

raw shape:  (11914, 16)


id,make,model,year,engine_fuel_type,engine_hp,engine_cylinders,transmission_type,driven_wheels,number_of_doors,market_category,vehicle_size,vehicle_style,highway_mpg,city_mpg,popularity,msrp
u32,cat,cat,i64,cat,i64,i64,cat,cat,i64,str,cat,cat,i64,i64,i64,i64
0,"""bmw""","""1_series_m""",2011,"""premium_unlead…",335,6,"""manual""","""rear_wheel_dri…",2,"""factory_tuner,…","""compact""","""coupe""",26,19,3916,46135
1,"""bmw""","""1_series""",2011,"""premium_unlead…",300,6,"""manual""","""rear_wheel_dri…",2,"""luxury,perform…","""compact""","""convertible""",28,19,3916,40650
2,"""bmw""","""1_series""",2011,"""premium_unlead…",300,6,"""manual""","""rear_wheel_dri…",2,"""luxury,high-pe…","""compact""","""coupe""",28,20,3916,36350
3,"""bmw""","""1_series""",2011,"""premium_unlead…",230,6,"""manual""","""rear_wheel_dri…",2,"""luxury,perform…","""compact""","""coupe""",28,18,3916,29450
4,"""bmw""","""1_series""",2011,"""premium_unlead…",230,6,"""manual""","""rear_wheel_dri…",2,"""luxury""","""compact""","""convertible""",28,18,3916,34500


In [34]:
df_market = (
    (
        df.select("id", "market_category")
        .with_columns([pl.col("market_category").str.split(",")])
        .explode("market_category")
    )
    .pivot(
        index="id",
        values="id",
        columns="market_category",
        aggregate_function="count",
    )
    .fill_null(0)
)
rename_map = {
    col: "market_" + col.lower().replace(" ", "_") if col != "id" else col
    for col in df_market.columns
}
df_market = df_market.rename(rename_map)
df_market.head()
df.join(df_market, on="id")
df.head()

id,make,model,year,engine_fuel_type,engine_hp,engine_cylinders,transmission_type,driven_wheels,number_of_doors,market_category,vehicle_size,vehicle_style,highway_mpg,city_mpg,popularity,msrp
u32,cat,cat,i64,cat,i64,i64,cat,cat,i64,str,cat,cat,i64,i64,i64,i64
0,"""bmw""","""1_series_m""",2011,"""premium_unlead…",335,6,"""manual""","""rear_wheel_dri…",2,"""factory_tuner,…","""compact""","""coupe""",26,19,3916,46135
1,"""bmw""","""1_series""",2011,"""premium_unlead…",300,6,"""manual""","""rear_wheel_dri…",2,"""luxury,perform…","""compact""","""convertible""",28,19,3916,40650
2,"""bmw""","""1_series""",2011,"""premium_unlead…",300,6,"""manual""","""rear_wheel_dri…",2,"""luxury,high-pe…","""compact""","""coupe""",28,20,3916,36350
3,"""bmw""","""1_series""",2011,"""premium_unlead…",230,6,"""manual""","""rear_wheel_dri…",2,"""luxury,perform…","""compact""","""coupe""",28,18,3916,29450
4,"""bmw""","""1_series""",2011,"""premium_unlead…",230,6,"""manual""","""rear_wheel_dri…",2,"""luxury""","""compact""","""convertible""",28,18,3916,34500
