## Csvs download link:

https://1drv.ms/u/s!ApzkikCpTYPyleAEL-G_GTz931I23g

In [12]:
import pandas as pd
import numpy as np

years = range(2011, 2016)


data = pd.DataFrame()
for year in years:
    file = 'CO2_passenger_cars_%d.csv' % year
    frame = pd.read_csv(
        file,
        encoding='utf-8',
        sep='\t',
        low_memory=False
    ).reset_index(drop=True)
    frame['year'] = year
    
    data = data.append(frame, ignore_index=True)


# Normalize nan data on 'Ft' column
data.loc[:, 'Ft'].fillna('', inplace=True)

# Set Ft column values to be lowercase
data.loc[:, 'Ft'] = data.loc[:, 'Ft'].apply(lambda ft: ft.lower())

# Normalize 'FIAT GROUP' manufacturer name between data sets
data.loc[data['Mh'] == 'FIAT GROUP AUTOMOBILES SPA', 'Mh'] = 'FIAT GROUP'

In [8]:
data2015 = data[data['year'] == 2015]

bymanufacturer = data2015.groupby('Mh')

df = pd.DataFrame({
        'total_new_registrations': bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': bymanufacturer['e (g/km)'].mean(),
        'mass_mean': bymanufacturer['m (kg)'].mean(),
    }).reset_index()

df = df[df['total_new_registrations'] > 100000]

In [21]:
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.plotting import ColumnDataSource
from bokeh.models import HoverTool, NumeralTickFormatter

source = ColumnDataSource(df)

p = figure(
        x_axis_label='Specific CO2 Emissions Mean',
        y_axis_label='Total New Registrations'
    )

p.circle('emissions_mean', 'total_new_registrations', source=source, size=6)

p.yaxis.formatter=NumeralTickFormatter(format="0.0a")

hover = HoverTool(
            tooltips=[
                ('Manufacturer', '@Mh'),
                ('CO2 Emissions', '@emissions_mean'),
                ('Total New Registrations', '@total_new_registrations')
            ]
        )

p.add_tools(hover)

output_notebook()

show(p)

In [42]:
source = ColumnDataSource(df)

p2 = figure(
        x_axis_label='Specific CO2 Emissions Mean',
        y_axis_label='Mass'
    )

p2.circle('emissions_mean', 'mass_mean', source=source, size=6)

# p.yaxis.formatter=NumeralTickFormatter(format="0.0a")

hover = HoverTool(
            tooltips=[
                ('Manufacturer', '@Mh'),
                ('CO2 Emissions', '@emissions_mean'),
                ('Mass', '@mass_mean')
            ]
        )

p2.add_tools(hover)

output_notebook()

show(p2)

In [44]:
petrol_data = data2015[data2015['Ft'].str.contains('petrol')]
diesel_data = data2015[data2015['Ft'].str.contains('diesel')]
electric_data = data2015[data2015['Ft'].str.contains('electric')]
lpg_data = data2015[data2015['Ft'].str.contains('lpg')]

In [45]:
petrol_data_bymanufacturer = petrol_data.groupby('Mh')

petrol_data_df = pd.DataFrame({
        'total_new_registrations': petrol_data_bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': petrol_data_bymanufacturer['e (g/km)'].mean()
    }).reset_index()

petrol_data_df = petrol_data_df[petrol_data_df['total_new_registrations'] > 100000]


electric_data_bymanufacturer = electric_data.groupby('Mh')

electric_data_df = pd.DataFrame({
        'total_new_registrations': electric_data_bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': electric_data_bymanufacturer['e (g/km)'].mean()
    }).reset_index()


diesel_data_bymanufacturer = electric_data.groupby('Mh')

diesel_data_df = pd.DataFrame({
        'total_new_registrations': diesel_data_bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': diesel_data_bymanufacturer['e (g/km)'].mean()
    }).reset_index()

In [46]:
source = ColumnDataSource(petrol_data_df)

p3 = figure(
        title='Petrol Data',
        x_axis_label='Specific CO2 Emissions Mean',
        y_axis_label='Total New Registrations'
    )

p3.circle('emissions_mean', 'total_new_registrations', source=source, size=6)

p3.yaxis.formatter=NumeralTickFormatter(format="0.0a")

hover = HoverTool(
            tooltips=[
                ('Manufacturer', '@Mh'),
                ('CO2 Emissions', '@emissions_mean'),
                ('Total New Registrations', '@total_new_registrations')
            ]
        )

In [47]:
electric_data_bymanufacturer = electric_data.groupby('Mh')

electric_data_df = pd.DataFrame({
        'total_new_registrations': electric_data_bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': electric_data_bymanufacturer['e (g/km)'].mean()
    }).reset_index()


source = ColumnDataSource(electric_data_df)

p4 = figure(
        title='Electric Data',
        x_axis_label='Specific CO2 Emissions Mean',
        y_axis_label='Total New Registrations'
    )

p4.circle('emissions_mean', 'total_new_registrations', source=source, size=6)

p4.yaxis.formatter=NumeralTickFormatter(format="0.0a")

hover = HoverTool(
            tooltips=[
                ('Manufacturer', '@Mh'),
                ('CO2 Emissions', '@emissions_mean'),
                ('Total New Registrations', '@total_new_registrations')
            ]
        )

In [48]:
diesel_data_bymanufacturer = electric_data.groupby('Mh')

diesel_data_df = pd.DataFrame({
        'total_new_registrations': diesel_data_bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': diesel_data_bymanufacturer['e (g/km)'].mean()
    }).reset_index()


source = ColumnDataSource(diesel_data_df)

p5 = figure(
        title='Diesel Data',
        x_axis_label='Specific CO2 Emissions Mean',
        y_axis_label='Total New Registrations'
    )

p5.circle('emissions_mean', 'total_new_registrations', source=source, size=6)

p5.yaxis.formatter=NumeralTickFormatter(format="0.0a")

hover = HoverTool(
            tooltips=[
                ('Manufacturer', '@Mh'),
                ('CO2 Emissions', '@emissions_mean'),
                ('Total New Registrations', '@total_new_registrations')
            ]
        )

In [49]:
lpg_data_bymanufacturer = lpg_data.groupby('Mh')

lpg_data_df = pd.DataFrame({
        'total_new_registrations': lpg_data_bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': lpg_data_bymanufacturer['e (g/km)'].mean()
    }).reset_index()


source = ColumnDataSource(lpg_data_df)

p6 = figure(
        title='Liquefied Petroleum Gas Data',
        x_axis_label='Specific CO2 Emissions Mean',
        y_axis_label='Total New Registrations',
    )

p6.circle('emissions_mean', 'total_new_registrations', source=source, size=6)

p6.yaxis.formatter=NumeralTickFormatter(format="0.0a")

hover = HoverTool(
            tooltips=[
                ('Manufacturer', '@Mh'),
                ('CO2 Emissions', '@emissions_mean'),
                ('Total New Registrations', '@total_new_registrations')
            ]
        )

In [51]:
from bokeh.layouts import gridplot

row1 =[p3, p4]

row2 = [p5, p6]

layout = gridplot([row1, row2], sizing_mode='scale_width')

# Link the x_range of p2 to p1: p2.x_range
p4.x_range = p3.x_range

# Link the y_range of p2 to p1: p2.y_range
p4.y_range = p3.y_range

# Link the x_range of p3 to p1: p3.x_range
p5.x_range = p3.x_range

p5.y_range = p3.y_range
# Link the y_range of p4 to p1: p4.y_range
p6.y_range = p3.y_range

p6.x_range = p3.x_range


show(layout)

In [19]:
from bokeh.palettes import Spectral11

manufacturerYear = {} 

for year in years:
    dataYear = data[data['year'] == year]
    bymanufacturer = dataYear.groupby('Mh')
    
    df = pd.DataFrame({
        'total_new_registrations': bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': bymanufacturer['e (g/km)'].mean()
    }).reset_index()

    manufacturerYear[year] = df.nlargest(10, 'total_new_registrations')

lines={}

for value in manufacturerYear[2015]['Mh']:
    lines[value] = [[], []]

for line in  lines:  
    for year in years:
        lines[line][0].append(year)
        d = manufacturerYear[year]
        emissions_value = d[d['Mh'] == line]['emissions_mean'].iloc[0]
        lines[line][1].append(emissions_value)

mypalette = Spectral11[0:10]

p8 = figure(x_axis_label='Years', y_axis_label='CO2 Emissions Mean')

hover = HoverTool()
p8.add_tools(hover)


for (line, color) in zip(lines, mypalette):
    p8.line(lines[line][0], lines[line][1], color = color, legend=line)

output_notebook()
show(p8)

In [14]:
my_df = pd.DataFrame({
    'total_new_registrations' : data.groupby(['Mh', 'year'])['r'].aggregate(np.sum),
    'emissions_mean': data.groupby(['Mh', 'year'])['e (g/km)'].mean()
}).reset_index()

manufacturers = np.array(my_df.loc[my_df['year'] == 2015].nlargest(10, 'total_new_registrations')['Mh'])

manufacturers_df = my_df.loc[my_df['Mh'].isin(manufacturers)]

In [20]:
from bokeh.charts import TimeSeries

manufacturers_dict = {}

for manufacturer in manufacturers:
    manufacturers_dict[manufacturer] = np.array(manufacturers_df.loc[manufacturers_df['Mh'] == manufacturer, 'emissions_mean']).tolist()

manufacturers_dict['year'] = manufacturers_df['year'].unique()

tsline = TimeSeries(
    manufacturers_dict,
    x='year', y=manufacturers.tolist(),
    color=manufacturers.tolist(),
    title="Manufacturers CO2 Emissions Mean by Year", xlabel='Year', ylabel='CO2 Emissions Mean',
    legend=True
)
tsline.legend.location = "top_right"

output_notebook()
show(tsline)