# Analysis of sold cars in Estonia

Data is taken from official source - https://www.transpordiamet.ee/soidukitega-tehtud-toimingute-statistika

Period - 5 years.

Only new cars where used in this analysis.


In [39]:
%reload_ext autoreload
%autoreload 0

import os

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [40]:
from mnt_sum import (
    get_summary,
    COLUMNS,
    COLUMN_SHORT_NAME,
    COLUMN_REG_DATE,
    COLUMN_CUSTOMER,
    PRIVATE_CUSTOMER,
    COLUMN_ENGINE_TYPE,
)

df_o = pd.concat([get_summary(f"data/{year}") for year in range(2018, 2025)])
df_o = df_o[COLUMNS + [COLUMN_REG_DATE]]
df_o.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 103556 entries, 42 to 1623
Data columns (total 11 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Mark             103556 non-null  object 
 1   Mudel            103556 non-null  string 
 2   short name       103556 non-null  object 
 3   Mootori tüüp     103556 non-null  object 
 4   Mootori maht     103556 non-null  Int64  
 5   Mootori võimsus  103556 non-null  Float64
 6   Linn             103556 non-null  object 
 7   Tüüp (isik)      103556 non-null  object 
 8   Arv              103556 non-null  Int64  
 9   Värv             63164 non-null   string 
 10  Esm reg aasta    103556 non-null  Int16  
dtypes: Float64(1), Int16(1), Int64(2), object(5), string(2)
memory usage: 9.3+ MB


In [41]:
df_o.head(5)

Unnamed: 0,Mark,Mudel,short name,Mootori tüüp,Mootori maht,Mootori võimsus,Linn,Tüüp (isik),Arv,Värv,Esm reg aasta
42,ALFA ROMEO,GIULIA,ALFA ROMEO GIULIA,BENSIIN_KATALYSAATOR,1995,206.0,Määramata,JURIIDILINE,1,,2018
43,AUDI,A1 SPORTBACK,AUDI A1,BENSIIN_KATALYSAATOR,1395,110.0,Tallinn,FÜÜSILINE,1,,2018
44,AUDI,A4 AVANT,AUDI A4,BENSIIN_KATALYSAATOR,1984,185.0,Määramata,FÜÜSILINE,1,,2018
45,AUDI,A4 AVANT,AUDI A4,DIISEL,1968,140.0,Tallinn,JURIIDILINE,1,,2018
46,AUDI,A4 LIMOUSINE,AUDI A4,BENSIIN_KATALYSAATOR,1395,110.0,Tartu,FÜÜSILINE,1,,2018


In [42]:
df_models = (
    df_o.groupby([COLUMN_SHORT_NAME], as_index=False)["Arv"]
    .sum()
    .sort_values("Arv", ascending=False)
    .reset_index(drop=True)
)
df_models.head()

Unnamed: 0,short name,Arv
0,TOYOTA RAV4,8036
1,SKODA OCTAVIA,6388
2,TOYOTA COROLLA,5750
3,SKODA KODIAQ,3826
4,RENAULT CLIO,3643


# Most popular cars

In [43]:
df_models_year = (
    df_o.groupby([COLUMN_SHORT_NAME], as_index=False)["Arv"]
    .sum()
    .sort_values("Arv", ascending=False)
    .reset_index(drop=True)
)

df_models_year.head(20)

Unnamed: 0,short name,Arv
0,TOYOTA RAV4,8036
1,SKODA OCTAVIA,6388
2,TOYOTA COROLLA,5750
3,SKODA KODIAQ,3826
4,RENAULT CLIO,3643
5,KIA SPORTAGE,3566
6,KIA CEED,3280
7,TOYOTA C-HR,2442
8,DACIA DUSTER,2360
9,SUBARU OUTBACK,2325


# Marks overview

In [44]:
df_marks_year = (
    df_o.groupby(["Mark", COLUMN_REG_DATE], as_index=False)["Arv"]
    .sum()
    .sort_values("Arv", ascending=False)
    .reset_index(drop=True)
)

df_marks_year.head()

Unnamed: 0,Mark,Esm reg aasta,Arv
0,TOYOTA,2022,4456
1,TOYOTA,2023,4211
2,TOYOTA,2021,3851
3,TOYOTA,2018,3665
4,TOYOTA,2019,3444


In [45]:
from matplotlib.colors import LinearSegmentedColormap


def sort_and_plot(_df, sortby=2024, limit=20, main_groupby=COLUMN_SHORT_NAME):
    cm = LinearSegmentedColormap.from_list(
        name="ryg",
        colors=["red", "yellow", "green"],
    )

    df_group = (
        _df.groupby([main_groupby, COLUMN_REG_DATE], as_index=False)["Arv"]
        .sum()
        .sort_values("Arv", ascending=False)
        .reset_index(drop=True)
    )

    df_h = (
        df_group.pivot(index=main_groupby, columns=COLUMN_REG_DATE, values="Arv")
        .sort_values([sortby], ascending=False)
        .astype("float64")
    )

    df_h = df_h.head(limit)

    df_h.style.format(precision=0)
    df_colorized = df_h.style.background_gradient(cmap=cm).format(precision=0)

    return df_colorized


sort_and_plot(df_marks_year, main_groupby="Mark")

Esm reg aasta,2018,2019,2020,2021,2022,2023,2024
Mark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TOYOTA,3665.0,3444,3362,3851,4456,4211,945
SKODA,2840.0,3079,2884,3157,2769,3358,856
KIA,1590.0,1529,1259,1867,1636,1813,350
VOLKSWAGEN,1690.0,1726,1197,1968,1595,1228,295
DACIA,943.0,1080,706,742,725,750,219
SUBARU,794.0,798,347,612,442,746,186
RENAULT,2592.0,2409,2021,1152,784,873,180
AUDI,385.0,390,496,539,1000,2544,179
PEUGEOT,1128.0,1055,1173,1393,816,880,157
MERCEDES-BENZ,394.0,368,466,564,746,674,147


# Total overview

In [46]:
sort_and_plot(df_o)


Esm reg aasta,2018,2019,2020,2021,2022,2023,2024
short name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SKODA OCTAVIA,911.0,1031.0,1054.0,1110,888,1052,342
TOYOTA RAV4,883.0,1194.0,1400.0,1440,1526,1311,282
TOYOTA COROLLA,378.0,1091.0,977.0,922,966,1151,265
SKODA KODIAQ,462.0,675.0,536.0,480,691,737,245
SUBARU OUTBACK,437.0,421.0,171.0,348,314,493,141
DACIA DUSTER,357.0,378.0,327.0,400,347,413,138
KIA SPORTAGE,602.0,571.0,399.0,633,641,599,121
TOYOTA YARIS CROSS,,,,49,498,510,120
KIA CEED,533.0,511.0,403.0,664,512,553,104
SKODA SUPERB,566.0,441.0,336.0,292,263,283,84


# Private owners

In [47]:
df_private = df_o[df_o[COLUMN_CUSTOMER].apply(lambda x: PRIVATE_CUSTOMER in x)]

sort_and_plot(df_private)

Esm reg aasta,2018,2019,2020,2021,2022,2023,2024
short name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TOYOTA RAV4,557.0,736.0,694.0,736,783,771,168
TOYOTA COROLLA,203.0,469.0,458.0,431,301,453,151
SKODA KODIAQ,266.0,390.0,330.0,249,321,350,132
SKODA OCTAVIA,420.0,482.0,598.0,492,289,373,116
TOYOTA YARIS CROSS,,,,22,293,348,89
SUBARU OUTBACK,240.0,203.0,99.0,205,174,270,87
KIA SPORTAGE,350.0,287.0,202.0,405,350,294,63
TOYOTA YARIS,96.0,86.0,133.0,165,160,121,52
SKODA KAMIQ,,45.0,242.0,187,131,228,45
VOLKSWAGEN TIGUAN,262.0,318.0,195.0,221,201,182,39


# Private owners electric cars

In [48]:
df_private_e = df_private[df_private[COLUMN_ENGINE_TYPE] == "ELEKTER"]

sort_and_plot(df_private_e)

  smin = np.nanmin(gmap) if vmin is None else vmin
  smax = np.nanmax(gmap) if vmax is None else vmax


Esm reg aasta,2018,2019,2020,2021,2022,2023,2024
short name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GREAT WALL MOTOR COMPANY LIMITED ORA FUNKY CAT,,,,,,,12
TESLA MODEL 3,,,11.0,5.0,11.0,18.0,9
TESLA MODEL Y,,,,1.0,1.0,63.0,7
DACIA SPRING,,,,,6.0,1.0,6
VOLKSWAGEN ID.4,,,,6.0,3.0,19.0,5
TOYOTA BZ4X,,,,,,7.0,5
SKODA ENYAQ,,,,22.0,27.0,59.0,4
VOLVO XC40,,,,,11.0,14.0,3
HYUNDAI IONIQ5,,,,4.0,11.0,22.0,3
KIA EV6,,,,,3.0,25.0,3


# Private owners hybrid cars

In [49]:
df_private_h = df_private[
    df_private[COLUMN_ENGINE_TYPE].apply(lambda x: "HYBRIID" in x)
]

sort_and_plot(df_private_h)

Esm reg aasta,2018,2019,2020,2021,2022,2023,2024
short name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TOYOTA COROLLA,,207.0,213.0,276.0,187.0,427,145
TOYOTA RAV4,279.0,314.0,291.0,347.0,357.0,412,136
TOYOTA YARIS CROSS,,,,13.0,203.0,330,61
KIA SPORTAGE,,,1.0,29.0,222.0,247,60
SKODA OCTAVIA,,,30.0,135.0,67.0,144,42
HONDA CR-V,,76.0,79.0,179.0,210.0,152,37
HYUNDAI TUCSON,,,8.0,151.0,177.0,133,35
TOYOTA C-HR,177.0,155.0,120.0,218.0,196.0,153,27
NISSAN QASHQAI,,,,78.0,156.0,143,26
TOYOTA YARIS,,,18.0,69.0,66.0,76,26


# Private owners diesel cars

In [50]:
df_private_h = df_private[df_private[COLUMN_ENGINE_TYPE].apply(lambda x: "DIISEL" in x)]

sort_and_plot(df_private_h)

Esm reg aasta,2018,2019,2020,2021,2022,2023,2024
short name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SKODA KODIAQ,137.0,151.0,115,90,167,174,64
SKODA OCTAVIA,61.0,63.0,184,107,78,144,63
VOLKSWAGEN TIGUAN,147.0,253.0,69,22,49,53,14
KIA SPORTAGE,189.0,107.0,59,102,35,57,13
TOYOTA PROACE CITY VERSO,,,20,19,39,30,12
SKODA SUPERB,57.0,62.0,28,30,19,23,11
DACIA DUSTER,119.0,49.0,47,47,58,76,10
PEUGEOT 5008,93.0,50.0,37,109,51,59,9
SEAT TARRACO,,41.0,22,31,13,16,9
SKODA KAROQ,94.0,81.0,25,32,33,47,8
