# Information regarding the data that this notebook refers to

# fill out the following fields. Check that input data and notebook name match.

# reference vehicle_fleet_data.xlsx notebook metadata to select notebook_id

In [1]:
notebook_id= '007'
name_string= '_s_NO_2008_2019_cl_me_statbank'

nb_name= notebook_id + name_string +'.ipynb'
nb_input_workbook= 'in' + name_string + '.xlsx'
nb_output_workbook= notebook_id + '.xlsx'
nb_stock_or_flow= 'stock'
nb_geography= '578'
nb_start_time= '1998'
nb_stop_time= '2008'
nb_attribute_1= 'class'
nb_attribute_2= 'registered'
nb_attribute_3= 'motor energy'
nb_attribute_4= 'use'
nb_data_source= 'STATBANK'
nb_data_source_url= 'https://www.ssb.no/en/statbank/table/01963'
nb_comment= ''

In [2]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib as plt

In [3]:
# write the notbook metadata into a data frame
notebook_metadata = pd.DataFrame({'notebook_name':nb_name,
                                 'input_file': nb_input_workbook, 
                                 'output_file': nb_output_workbook, 
                                 'source': nb_data_source, 
                                 'geography': nb_geography,
                                 'start_time': nb_start_time,
                                 'stop_time': nb_stop_time, 
                                 'attribute1': nb_attribute_1, 
                                 'attribute2': nb_attribute_2,
                                 'attribute3': nb_attribute_3,
                                 'attribute4': nb_attribute_4,
                                 'source_url' : nb_data_source_url,
                                 'comment': nb_comment}, index=[notebook_id])

## Reading in the excel data and merging the sheets into one dataframe with category info attached to row data

In [4]:
# read in data,
xls = pd.ExcelFile(nb_input_workbook)

In [5]:
# extract the data into a dataframe or several
df = pd.read_excel(xls, sheet_name= 'Personbil1')
df

Unnamed: 0,motor_energy,class,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Petrol,Private cars,1596852,1550434,1500841,1448232,1408198,1368625,1328380,1295739,1196148,1139998,1075179,1031207
1,Petrol,Ambulances,218,195,172,140,124,111,99,109,99,88,85,88
2,Petrol,Buses,1344,1049,864,717,578,491,421,348,308,250,223,202
3,Petrol,Vans,64971,60030,54526,48304,43626,39166,35456,31896,29463,26814,24765,23599
4,Petrol,Combines vehicles,11782,10131,8534,7174,6009,4916,4138,3468,2828,2296,1969,1741
5,Petrol,Lorries,7534,7326,7085,6605,6045,5437,4827,4275,3420,3037,2689,2479
6,Petrol,Mopeds,159557,163387,166604,169382,172256,173376,174742,175382,175459,168959,160373,155550
7,Petrol,Light motor cycles,17670,18468,19077,19753,20574,21300,22063,23441,24725,25863,26670,28122
8,Petrol,Heavy motor cycles,116331,121801,126214,130389,134537,138721,143839,150902,157668,162477,164180,168506
9,Diesel,Private cars,597391,690560,804384,922986,1025220,1110621,1186194,1243235,1276947,1294493,1290442,1281019


In [6]:
# MOTOR ENERGY code replacement
# read in data, skip rows so that headings become column names
dictxls = pd.ExcelFile('dictionary_vehicle_fleet.xlsx')
# assemble drive train code dictionary
me_dict = pd.read_excel(dictxls, sheet_name= 'fuel_types', index_col= 0)
me_source = me_dict['source'].to_list()
me_code = me_dict['dt_code'].to_list()

df['motor_energy'].replace(to_replace=me_source, value=me_code, inplace=True )
#find codes which did not match 
df.loc[~df['motor_energy'].isin(me_code)]

Unnamed: 0,motor_energy,class,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019


 #### replace class titles with codes
  

In [7]:
# read in the class_metatdata sheet which has equivalent terms for class types according to source
metaxls = pd.ExcelFile('metadata_vehicle_fleet.xlsx')
classMetadata = pd.read_excel(metaxls, sheet_name= 'class_metadata', index_col= 0)

In [8]:
classMetadata

Unnamed: 0_level_0,code,label,description,footnote,source,sourcelink1,sourcelink2,Unnamed: 8,Unnamed: 9,Unnamed: 10,PCMO,SML0,classification scheme3
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1.0,EUM1,passenger car,"Designed for the carriage of passengers, with ...",,European Comission,https://ec.europa.eu/growth/sectors/automotive...,,,,,p,m,
2.0,EUM2,light bus,"Designed for the carriage of passengers, carry...",,European Comission,https://ec.europa.eu/growth/sectors/automotive...,,,,,c,l,
3.0,EUM3,heavy bus,"Designed for the carriage of passengers, havin...",,European Comission,https://ec.europa.eu/growth/sectors/automotive...,,,,,c,l,
4.0,EUN1,light commercial vehicle,"Designed for the carriage of goods, having a m...",,European Comission,https://ec.europa.eu/growth/sectors/automotive...,,,,,c,m,
5.0,EUN2,heavy commercial vehicle,"Designed for the carriage of goods, having a m...",,European Comission,https://ec.europa.eu/growth/sectors/automotive...,,,,,c,l,
6.0,EUN3,transport truck,"Designed for the carriage of goods, having a m...",,European Comission,https://ec.europa.eu/growth/sectors/automotive...,,,,,c,l,
7.0,EUL,"motorcycles, tricycles, and quadracycles",Motor vehicles with less than four wheels and ...,,European Comission,https://ec.europa.eu/growth/sectors/automotive...,,,,,m,s,
8.0,OIPC,passenger car,"Passenger cars are road motor vehicles, other ...",,OICA,http://www.oica.net/wp-content/uploads//DEFINI...,,,,,p,m,
9.0,OICV,commercial vehicle,Commercial vehicles include light commercial v...,,OICA,http://www.oica.net/wp-content/uploads//DEFINI...,,,,,c,l,
10.0,USLDS,"light duty vehicle, short wheel base","Light duty vehicle, short wheel base replaced ...",,USDOT,https://www.bts.gov/content/number-us-aircraft...,https://www.bts.gov/sites/bts.dot.gov/files/do...,,,,p,m,


In [9]:
# limit the data to the rows specific to relevent source
sourceClassDF= classMetadata[classMetadata.source == nb_data_source] 

In [10]:
sourceClassDF

Unnamed: 0_level_0,code,label,description,footnote,source,sourcelink1,sourcelink2,Unnamed: 8,Unnamed: 9,Unnamed: 10,PCMO,SML0,classification scheme3
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
24.0,NOPC,Private cars,A private car is a car mainly with the purpose...,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,p,p,
25.0,NOAMB,Ambulances,Motor vehicle specialised for transport of sic...,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,c,m,
26.0,NOBUS,Buses,A bus is a vehicle for passenger transport wit...,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,c,m,
,NOVAN,Vans,A van is a car for transport of goods with max...,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,c,m,
27.0,NOLO,Lorries,A lorry is a vehicle for transport of goods,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,c,l,
28.0,NOCV,Combines vehicles,Vehicle mainly with the purpose of transportin...,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,c,m,
29.0,NOTR,Tractors,A tractor is a motor vehicle mostly used for a...,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,o,o,
30.0,NOMP,Mopeds,"Includes all kind of mopeds, both two wheel-, ...",,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,m,s,
31.0,NOLMC,Light motor cycles,A light motor cycle is a motorcycle with cylin...,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,m,s,
32.0,NOHMC,Heavy motor cycles,Motor cycle with a cylinder capacity above 125...,,STATBANK,https://www.ssb.no/a/metadata/conceptvariable/...,,,,,m,s,


In [11]:
sourceLabel= sourceClassDF.label.to_list()

In [12]:
sourceCode = sourceClassDF.code.to_list()


df['class'].replace(to_replace=sourceLabel, value=sourceCode, inplace=True )

#find codes which did not match 
df.loc[~df['class'].isin(sourceCode)]

Unnamed: 0,motor_energy,class,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019


## rearrange data into proper format

In [13]:
df.head()

Unnamed: 0,motor_energy,class,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,ICE,NOPC,1596852,1550434,1500841,1448232,1408198,1368625,1328380,1295739,1196148,1139998,1075179,1031207
1,ICE,NOAMB,218,195,172,140,124,111,99,109,99,88,85,88
2,ICE,NOBUS,1344,1049,864,717,578,491,421,348,308,250,223,202
3,ICE,NOVAN,64971,60030,54526,48304,43626,39166,35456,31896,29463,26814,24765,23599
4,ICE,NOCV,11782,10131,8534,7174,6009,4916,4138,3468,2828,2296,1969,1741


In [14]:
#.melt() pivots the table bringing the column headers into a new attribute
melted = df.melt(id_vars=['motor_energy','class'], var_name= 'year')
melted

Unnamed: 0,motor_energy,class,year,value
0,ICE,NOPC,2008,1596852
1,ICE,NOAMB,2008,218
2,ICE,NOBUS,2008,1344
3,ICE,NOVAN,2008,64971
4,ICE,NOCV,2008,11782
...,...,...,...,...
643,OTH,NOCV,2019,2
644,OTH,NOLO,2019,75
645,OTH,NOMP,2019,0
646,OTH,NOLMC,2019,1


## structuring the data into format of datastructure


In [15]:
#rename the data frame to df
df=melted
# add in a column of indexes
df.index = notebook_id + df.index.astype(str).str.zfill(9)

In [17]:
#rename or add all necessary columns
df.loc[:,'year_of_measurement']= df.loc[:,'year']
# date of measurement is concatonated to year of measurement to achieve desired format
df.loc[:,'date_of_measurement']=''
df.head(2)

Unnamed: 0,motor_energy,class,year,value,year_of_measurement,date_of_measurement
7000000000,ICE,NOPC,2008,1596852,2008,
7000000001,ICE,NOAMB,2008,218,2008,


In [18]:
df.loc[:,'geo']= '578'
df.loc[:,'process']= 'r'
df.loc[:,'vehicle_class']= df['class']
df.loc[:,'vehicle_segment']='all'
df.loc[:,'motor_energy']= df['motor_energy']
df.loc[:,'model_year']= 'all'
df.loc[:,'year_of_first_registraion']=''
df.loc[:,'value']= df['value']
df.loc[:,'unit']= 'nr'
df.loc[:,'source']= nb_data_source
df.loc[:,'accessed']= '2020-08-04'
df.loc[:,'notebook']= nb_name
df.loc[:,'footnote']= ''

In [19]:
# create a finalized dataframe for output with columns in arranged order
heading_list= ['year_of_measurement','date_of_measurement','geo','process','vehicle_class','vehicle_segment','motor_energy','model_year','year_of_first_registraion','value','unit','source','accessed','notebook','footnote']
df_out= df[heading_list]

In [20]:
#check df_out before saving
df_out

Unnamed: 0,year_of_measurement,date_of_measurement,geo,process,vehicle_class,vehicle_segment,motor_energy,model_year,year_of_first_registraion,value,unit,source,accessed,notebook,footnote
007000000000,2008,,578,r,NOPC,all,ICE,all,,1596852,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,
007000000001,2008,,578,r,NOAMB,all,ICE,all,,218,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,
007000000002,2008,,578,r,NOBUS,all,ICE,all,,1344,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,
007000000003,2008,,578,r,NOVAN,all,ICE,all,,64971,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,
007000000004,2008,,578,r,NOCV,all,ICE,all,,11782,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
007000000643,2019,,578,r,NOCV,all,OTH,all,,2,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,
007000000644,2019,,578,r,NOLO,all,OTH,all,,75,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,
007000000645,2019,,578,r,NOMP,all,OTH,all,,0,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,
007000000646,2019,,578,r,NOLMC,all,OTH,all,,1,nr,STATBANK,2020-08-04,007_s_NO_2008_2019_cl_me_statbank.ipynb,


## at this point restart kernel and run all cells

In [21]:
# write an output file named according to notebook id with relevant data and metadata
writer = pd.ExcelWriter(nb_output_workbook, engine='xlsxwriter')
df_out.to_excel(writer, sheet_name='data', merge_cells=False)
notebook_metadata.to_excel(writer, sheet_name= 'notebook_metadata')
writer.save()

In [22]:
# add the data from the notebook to the stock dataframe and notebook metadata pickles

import pickle
# load in the stock DataFrame pickle
stock_pickle = pd.read_pickle('stock_df.pickle')

# concatenate the out_df to the stock_pickle and remove any duplicate rows
stock_df = pd.concat([stock_pickle,df_out]).drop_duplicates()

# write the updated stock dataframe to pickle
stock_df.to_pickle('stock_df.pickle')

# repeat the process for the metadata
metadata_pickle = pd.read_pickle('metadata_df.pickle')

metadata_df = pd.concat([metadata_pickle, notebook_metadata]).drop_duplicates()

metadata_df.to_pickle('metadata_df.pickle')

In [23]:
# update the stock metadata 
writer = pd.ExcelWriter('vehicle_fleet_stock.xlsx', engine='xlsxwriter')
stock_df.to_excel(writer, sheet_name='data', merge_cells=False)
metadata_df.to_excel(writer, sheet_name= 'notebook_metadata')
writer.save()