# Creates a mapping from year to frequency of car model

In [254]:
# Make a graph to visualize the most popular car models over time.

# steps:
# for each year
# get list of the rows with data for that year
# get frequency of models sold in year
# bar graph for the years?, it's pretty bad

import numpy # linear algebra
import pandas # data processing, CSV file I/O (e.g. pd.read_csv)
from collections import Counter
        
cars = pandas.read_csv("/kaggle/input/carsforsale/cars_raw.csv")
# list of the frequency that each car was sold
frequencies = []
# list of unique years in the data
years = numpy.unique(cars.get("Year"))
# list of unique models in the data
car_models = numpy.unique(cars.get("Model"))

for year in years:
    # get rows of data with year year
    # pandas.core.frame.DataFrame
    rows = cars.loc[cars["Year"] == year]
    # get the model of the rows
    models = rows.get("Model")
    # frequency of each model in year year
    frequencies.append(dict(Counter(models)))

# make map between year and frequency
# zip -> zip object - kindof like list of tuples
#      list(zip(years, frequencies)) -> [(year, {frequencies}), (year, {frequencies}), (year, {frequencies}), ...] 
# dict -> {year: {frequencies}, year: {frequencies}, year: {frequencies}, ...}
frequencies_map = dict(zip(years,frequencies))

# create a dataframe from the frequencies_map

In [255]:
df = pandas.DataFrame.from_dict(frequencies_map)
df = df.transpose()

# create a stacked bar graph from the frequencies of all models in the dataset

In [256]:
df.plot.bar(stacked=True,figsize=(30,30))

# create a mapping and dataframe from year to the top 3 models of car in that year

In [257]:
# top 3 cars sold

frequencies_list = list(zip(years,frequencies))

# sort frequencies list by popular car in decreasing order
frequencies_list_sorted = []
for item in frequencies_list:
    frequencies_list_sorted.append(sorted(item[1].items(),key=lambda dict_item: dict_item[1],reverse=True))
    
i = 0
top_three = []
for year in years:
    top_three.append(frequencies_list_sorted[i][:3])
    i+=1

# make top_three dict for pandas.DataFrame
top_three_dict = []
for tuple_list in top_three:
    top_three_dict.append(dict(tuple_list))
top_three_map = dict(zip(years, top_three_dict))
df = pandas.DataFrame.from_dict(top_three_map)
df = df.transpose()

# stacked bar graph of top 3 cars in each year, with the exception of the years that do not have 3 or more data entries

In [258]:
df.plot.bar(stacked=True,figsize=(25,15))

In [259]:
# get top 10 models
models_frequency = dict(Counter(cars.get("Model")))
top_10_tuples = sorted(models_frequency.items(), key=lambda dict_item:dict_item[1],reverse=True)[:10]
# just the model name without frequency
top_10_list = []
for model in top_10_tuples:
    top_10_list.append(model[0])

In [260]:
# original dataframe with frequency of all models
df = pandas.DataFrame.from_dict(frequencies_map)
df = df.transpose()
df
# drop all but the most frequently occuring 10
for col in df:
    if not col in top_10_list:
        df.drop(labels=[col],axis=1,inplace=True)

# stacked bar graph of top 10 overall car models throughout the years

In [261]:
df.plot.bar(stacked=True,figsize=(25,15))

# line graph of the top 10 overall models throughout the years

In [262]:
df.fillna(value=0,inplace=True)
df.plot.line(figsize=(25,15))

In [263]:
# back to the first dataframe with all data
df = pandas.DataFrame.from_dict(frequencies_map)
df = df.transpose()

In [264]:
# get bar chart race library
!pip install bar_chart_race

In [265]:
import bar_chart_race as bcr
import warnings

df_values, df_ranks=bcr.prepare_wide_data(df, steps_per_period=1, orientation="h", sort="desc")
df_values.head()
# bcr gives a bunch of warnings about font that are not super important
warnings.filterwarnings("ignore")
bcr.bar_chart_race(df_values,period_fmt="{x:.0f}", filename="cars.mp4",cmap="dark24",n_bars=10,period_length=1000, title="Top 10 Car Models 2001 - 2022")
warnings.resetwarnings()

# bar chart race video of using full dataset

In [266]:
from IPython.display import Video

Video("/kaggle/working/cars.mp4", width=800, height=600, embed=True)