Note: 2017 was the first year where PKWs had a category for "Elektro". Before that, there was only a category for "sonstige". (Years from 2017 have both categories.)

Data from https://www.kba.de/DE/Statistik/Produktkatalog/produkte/Fahrzeuge/fz1_b_uebersicht.html

In [144]:
import pandas as pd

## Making dataframes from the years where we have a CSV

In [151]:
#List of columns that we want as they are listed in CSV files. New list uses Python numbering.
csv_col_list = [1,7,19,21,36,37,40,41,44,45,46,47,48,49,50,51,52]
py_col_list = [i - 1 for i in csv_col_list]

#imports the manually typed CSV files
df2013 = pd.read_csv("data/2013 formatted selected - Sheet1.csv",usecols=py_col_list)
df2014 = pd.read_csv("data/2014 formatted selected - Sheet1.csv",usecols=py_col_list)
df2015 = pd.read_csv("data/2015 formatted - Sheet1.csv",usecols=py_col_list)
df2016 = pd.read_csv("data/2016 formatted - Sheet1.csv",usecols=py_col_list)
df2017 = pd.read_csv("data/2017 formatted selected - Sheet1.csv",usecols=py_col_list)


#function to get rid of unneccesary row
def drop_row(dataframe):
    return(dataframe.drop([0],axis=0))

#calling the function
df2013 = drop_row(df2013)
df2014 = drop_row(df2014)
df2015 = drop_row(df2015)
df2016 = drop_row(df2016)
df2017 = drop_row(df2017)

In [175]:
df2017

Unnamed: 0.1,Unnamed: 0,PKW insgesamt,PKW Dichte (je 1 000 Einwohner),LKW insgesamt,Kraftfahrzeuge insgesamt,Kraftfahrzeuge Dichte (je 1000 Einwohner),PKW Benzin,PKW Diesel,PKW sonstige Energiequelle,PKW Emissionsgruppe Euro 1,PKW Emissionsgruppe Euro 2,PKW Emissionsgruppe Euro 3,PKW Emissionsgruppe Euro 4,PKW Emissionsgruppe Euro 5,PKW Emissionsgruppe Euro 6,PKW Emissionsgruppe sonstige,PKW Emissionsgruppe schadstoffreduzierte insgesamt
1,Berlin,1195149,344,93141,1409642,406,856846,309048,1668,26144,131596,133527,388045,329256,167136,2990,1178694
2,Hamburg,771573,438,54134,892367,506,494256,266459,956,15154,68913,75504,226823,213894,153824,4181,758293
3,München (09162),722384,505,35381,831096,581,411560,300255,1570,15577,51669,59993,177710,192869,200772,6849,705439
4,Köln (05315),469677,449,29087,542361,518,299172,161675,506,9306,41906,48377,145061,128226,86742,1964,461582
5,Frankfurt am Main (06412),329398,459,21677,376226,524,182927,141663,410,6026,26127,28878,87764,93694,79289,1096,322874
6,Stuttgart (08111),298172,487,15339,341858,558,182461,111148,814,5161,25747,26962,79422,83244,71586,950,293072
7,Düsseldorf (05111),304410,504,17973,350655,580,179096,119816,259,6429,26308,27777,82889,89816,64102,1159,298380
8,Leipzig (14713),220026,404,14512,250184,459,159613,56144,170,3340,19898,23293,74836,66014,30062,509,217952
9,Dortmund,278018,479,13596,317570,547,1929822,79998,140,5204,28275,30341,98244,78532,37690,902,274188
10,Essen,282160,492,16147,323359,564,197534,78681,155,5030,27500,29352,91235,82909,42277,506,278809


## Making dataframes from the years where we have a .xlsx

In [170]:
#read the sheet named FZ1.1 from the excel files, skips the first 8 rows, skips the column without header name
df2021a = pd.read_excel('data/fz1_2021.xlsx', sheet_name='FZ1.1',skiprows=8,usecols=lambda x: 'Unnamed' not in x)

#fill the merged cells with the corresponding value
df2021a = df2021a.fillna(method='ffill')

#remove the final 5 rows. They don't have relevant data and are in a different format.
df2021a = df2021a[:-6]

In [171]:
#same as above for most of sheet 2 the formatting makes it hard
#the Excel formatting makes it hard to get the columns before Benzin but they're not needed
df2021b = pd.read_excel('data/fz1_2021.xlsx', sheet_name='FZ1.2',skiprows=8,usecols=lambda x: 'Unnamed' not in x)
df2021b = df2021b.fillna(method='ffill')
df2021b = df2021b[:-5]

In [172]:
#combines the dataframes from each of the sheetss
df2021 = pd.concat([df2021a, df2021b], axis=1)

In [173]:
#these numbers are used in the spreadsheet to identify our 10 cities
cities = "11000|02000|09162|05315|06412|08111|05111|14713|05913|05113"

#filters the dataframe to only show rows with the 10 numbers ie. our 10 cities
df2021 = df2021.loc[df2021['Statistische Kennziffer und Zulassungsbezirk'].str.contains(cities, case=False)]

#Berlin and Hamburg are cities and Länder so this line is needed to stop them from being duplicated in the df
df2021 = df2021.loc[df2021["Land"].str.contains('INSGESAMT')==False]

#selects the columns we want
df2021 = df2021.iloc[:,[2,8,19,20,33,34,35,36,40,42,43,44,45,46,47,50,51]]

#renames the columns
df2021.columns = df2021.columns.str.replace('Euro', 'PKW Emissionsgruppe Euro')
df2021.rename(columns = {'insgesamt.1':'PKW insgesamt', 
                         'PKW-Dichte\nje 1.000 \nEinwohner':'PKW Dichte',
                         'insgesamt.2':'LKW insgesamt',
                         'insgesamt.4':'KFZ insgesamt',
                         'Kfz-Dichte\nje 1.000 \nEinwohner':'KFZ Dichte',
                         'sonstige.1':'PKW Emissionsgruppe sonstige',
                         'schadstoff-\nreduzierte \ninsgesamt':'PKW Emissionsgruppe schadstoffreduzierte insgesamt'}, inplace = True)

In [174]:
df2021

Unnamed: 0,Statistische Kennziffer und Zulassungsbezirk,PKW insgesamt,PKW Dichte,LKW insgesamt,KFZ insgesamt,KFZ Dichte,Benzin,Diesel,Elektro (BEV),PKW Emissionsgruppe Euro 1,PKW Emissionsgruppe Euro 2,PKW Emissionsgruppe Euro 3,PKW Emissionsgruppe Euro 4,PKW Emissionsgruppe Euro 5,PKW Emissionsgruppe Euro 6,PKW Emissionsgruppe sonstige,PKW Emissionsgruppe schadstoffreduzierte insgesamt
0,"08111 STUTTGART,STADT",308411.0,485,17321.0,357025.0,561,205532.0,79841.0,5033.0,3412.0,14571.0,13565.0,54402.0,54910.0,155457.0,5873.0,302190.0
50,"09162 MUENCHEN,STADT",732045.0,493,42913.0,853850.0,575,431465.0,247477.0,8271.0,8451.0,29096.0,35336.0,128084.0,141876.0,363673.0,10712.0,717228.0
153,11000 BERLIN,1234645.0,336,111551.0,1474487.0,402,890077.0,280762.0,9085.0,15515.0,71975.0,83238.0,305714.0,270994.0,456356.0,12055.0,1215847.0
177,02000 HAMBURG,805780.0,436,66809.0,943917.0,511,520882.0,248828.0,7035.0,9726.0,38494.0,46021.0,174080.0,174211.0,337463.0,10465.0,790460.0
180,"06412 FRANKFURT AM MAIN,STADT",342431.0,449,24331.0,394066.0,516,203620.0,119615.0,2304.0,3454.0,14047.0,16960.0,66755.0,64280.0,166453.0,3267.0,335216.0
268,"05111 DUESSELDORF, STADT",317580.0,511,19692.0,368102.0,592,195542.0,103782.0,2435.0,4018.0,14359.0,16824.0,63354.0,62535.0,146131.0,3392.0,310613.0
270,"05113 ESSEN,STADT",295738.0,507,17972.0,340069.0,584,211541.0,70384.0,1659.0,2842.0,14410.0,18030.0,71339.0,63845.0,118992.0,2079.0,291537.0
285,"05315 KOELN,STADT",492402.0,453,33143.0,573199.0,527,326277.0,137673.0,3204.0,5661.0,23270.0,29516.0,114393.0,99918.0,205451.0,4460.0,482669.0
314,"05913 DORTMUND,STADT",296263.0,504,15841.0,339394.0,577,207485.0,76951.0,1773.0,3037.0,14514.0,18657.0,74854.0,66434.0,111719.0,2626.0,291841.0
385,"14713 LEIPZIG, STADT",232677.0,392,17400.0,268060.0,452,164873.0,58451.0,1359.0,2047.0,10605.0,14032.0,56503.0,56330.0,88430.0,1779.0,229726.0
