# Greenhouse gas emissions prediction for the City of Helsinki

This model uses the greenhouse gas (GHG) [estimates](https://hsy.fi/paastot) calculated by the Helsinki Region Environmental Services Authority HSY.

The GHG emission target for the City of Helsinki is 80% reduction from the levels of the year 1990. Trend prediction is done using a simple linear regression model based on the historical data.

In [None]:
INPUT_DATASETS = ['jyrjola/hsy']

import math
import re
import scipy
import scipy.stats
import pandas as pd
import numpy as np
import importlib

for dataset in INPUT_DATASETS:
    mod_path = dataset.replace('/', '.')
    try:
        mod = importlib.import_module('quilt.data.%s' % mod_path)
    except ImportError:
        import quilt
        quilt.install(dataset)

    import aplans_graphs

import plotly
import plotly.graph_objs as go
import cufflinks as cf

plotly.offline.init_notebook_mode(connected=True)
cf.set_config_file(offline=True)

In [None]:
from quilt.data.jyrjola import hsy

def generate_forecast_series(historical_series, year_until):
    s = historical_series
    start_year = s.index.min()
    res = scipy.stats.linregress(s.index, s)

    years = list(range(start_year, year_until + 1))
    predictions = pd.Series([res.intercept + res.slope * year for year in years], index=years)
    last_val = s[start_year]
    return predictions

def generate_plot(ghg_emissions, forecast, target, forecast_target_day):
    """Generate a plot with the historical GHG estimates and the forecast lines
    """
    df = ghg_emissions.reset_index().set_index('Vuosi')
    last_year = df.index.max()
    # Order the sectors based on impact in the last measured year
    sectors = list(df.xs(last_year).sort_values('Päästöt', ascending=False)['Sektori1'])
    data = []
    for sector_name in sectors:
        s = df[df['Sektori1'] == sector_name]['Päästöt']
        bar = go.Bar(x=s.index, y=s, name=sector_name, legendgroup='historical')
        data.append(bar)

    # There is discontinuity between 1990 and 2000 which messes up the plot,
    # so show only years from 2000 onwards.
    forecast = forecast[forecast.index >= 2000]
    forecast = forecast[forecast.index <= forecast_target_day.index[0].year + 1]
    
    current_trend_line = go.Scatter(
        x=forecast.index, y=forecast, name='Nykytrendi',
        line=dict(color='blue', dash='dash'), opacity=0.5,
        legendgroup='prediction'
    )
    data.append(current_trend_line)

    last_year_ghg = ghg_emissions.xs(last_year)['Päästöt'].sum()
    goal_series = pd.Series([last_year_ghg] + list(target), index=[last_year, target.index[0]])
    goal_series = goal_series.reindex(range(goal_series.index.min(), goal_series.index.max() + 1))
    goal_series.interpolate(inplace=True)
    goal_line = go.Scatter(
        x=goal_series.index, y=goal_series, name='Tavoite', mode='lines',
        line=dict(color='green', dash='dash'), opacity=0.5,
        legendgroup='prediction'
    )
    data.append(goal_line)

    shapes = [
        # The dashed line separating 1990 from 2000
        {
            'type': 'line',
            'x0': 0.5,
            'x1': 0.5,
            'xref': 'x',
            'y0': 0,
            'y1': 1,
            'yref': 'paper',
            'opacity': 0.8,
            'line': {
                'color': '#555',
                'width': 2,
                'dash': 'dash',
            }
        }, 
        # The shaded indicating the target area for emissions 
        {
            'type': 'rect',
            'xref': 'paper',
            'yref': 'y',
            'x0': 0,
            'y0': 0,
            'x1': 1,
            'y1': target.values[0],
            'line': {
                'width': 0,
            },
            'fillcolor': 'green',
            'opacity': 0.3
        },
    ]

    xaxis = {
        'type': 'category',
    }
    yaxis = {
        'hoverformat': '.3r',
        'separatethousands': True,
        'title': 'KHK-päästöt (kt CO₂-ekv.)'
    }

    layout = go.Layout(barmode='stack', xaxis=xaxis, yaxis=yaxis, shapes=shapes, separators=', ')
    fig = go.Figure(data=data, layout=layout)
    fig.iplot()
    aplans_graphs.post_graph(fig, 5)


def estimate_ghg_emissions():
    df = hsy.pks_khk_paastot()
    # We're examining the data about only Helsinki
    ghg_emissions = df[df['Kaupunki'] == 'Helsinki'].drop('Kaupunki', axis=1)
    # Sum all the sub-sectors
    ghg_emissions = ghg_emissions.groupby(['Vuosi', 'Sektori1']).sum()

    # Sum the all the emissions sectors by year
    sum_emissions = ghg_emissions.groupby('Vuosi')['Päästöt'].sum()
    
    # The GHG emissions target is 20 % of year 1990 emissions
    target = sum_emissions[1990] * .20

    # Estimate GHG emissions based on a linear regression over the current
    # data.
    forecast = generate_forecast_series(sum_emissions, 2100)
    # Assume the yearly GHG emissions "land" on the last day of the year
    days = forecast.index.astype(str) + '-12-31'
    daily_forecast = forecast.copy()
    daily_forecast.index = pd.to_datetime(days, format='%Y-%m-%d')
    # Generate a daily emission series with linear interpolation
    daily_index = pd.date_range(daily_forecast.index.min(), daily_forecast.index.max())
    daily_forecast = daily_forecast.reindex(daily_index).interpolate()

    # The first day when we finally reach our target
    day_when_target_reached = daily_forecast[daily_forecast < target].head(1)
    target_day = pd.Series(target, index=[2035])
    generate_plot(ghg_emissions, forecast, target_day, day_when_target_reached)

estimate_ghg_emissions()
