In [None]:
import pandas as pd
import numpy as np

data = pd.read_csv(
    'CO2_passenger_cars_v12.csv',
    encoding='utf-8',
    sep='\t'
)

data.reset_index(drop=True)

In [37]:
bymanufacturer = data.groupby('Mh')

df = pd.DataFrame({
        'mass_mean': bymanufacturer['m (kg)'].mean(),
        'total_new_registrations': bymanufacturer['r'].aggregate(np.sum),
        'emissions_mean': bymanufacturer['e (g/km)'].mean()
    }).reset_index()



In [41]:
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.plotting import ColumnDataSource
from bokeh.models import HoverTool, NumeralTickFormatter

source = ColumnDataSource(df)

p = figure(
        x_axis_label='Specific CO2 Emissions Mean',
        y_axis_label='Total New Registrations'
    )

p.circle('emissions_mean', 'total_new_registrations', source=source, size=6)

p.yaxis.formatter=NumeralTickFormatter(format="0.0a")

hover = HoverTool(
            tooltips=[
                ('Manufacturer', '@Mh'),
                ('CO2 Emissions', '@emissions_mean'),
                ('Total New Registrations', '@total_new_registrations')
            ]
        )

p.add_tools(hover)

output_notebook()

show(p)