This notebook displays an interactive graphic made with Bokeh, which shows a variety of forecasts and projections for the Marcellus shale gas play in the northeast United States.

In [1]:
import pandas as pd
import numpy as np
from bokeh.palettes import Blues5, Greys9, Reds6
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.models import HoverTool, TapTool

In [2]:
output_notebook()

In [12]:
forecasting_path = '/Users/mason/Dropbox/Documents [DB]/work/forecasting project - Together'
file1 = 'Marcellus forecasts - all 2016-08 for Github.csv'
all_forecasts = pd.read_csv(forecasting_path+'/'+file1,
                     index_col=0, parse_dates=True)
all_forecasts.head()

Unnamed: 0_level_0,Navigant 2014-08,WoodMac 2014-02,Rice Model 2014-04,Medlock & Jaffe 2011,UT $4/Mcf,UT EIA price case,UT $6/Mcf,EIA AEO 2016,EIA AEO 2015,EIA AEO 2014,EIA AEO 2013,EIA AEO 2012,historical
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2000-07-01,,,,,,,,,,,,,
2001-07-01,,,,,,,,,,,,,
2002-07-01,,,,,,,,,,,,,
2003-07-01,,,,,,,,,,,,,
2004-07-01,,,,,,,,,,,,,


In [13]:
all_forecasts.columns

Index(['Navigant 2014-08', 'WoodMac 2014-02', 'Rice Model 2014-04',
       'Medlock & Jaffe 2011', 'UT $4/Mcf', 'UT EIA price case', 'UT $6/Mcf',
       'EIA AEO 2016', 'EIA AEO 2015', 'EIA AEO 2014', 'EIA AEO 2013',
       'EIA AEO 2012', 'historical'],
      dtype='object')

In [14]:
numlines = len(all_forecasts.columns)

all_forecasts_dict = {
    'xs': [all_forecasts.index.values] * numlines,
    'ys': [all_forecasts[name].values for name in all_forecasts],
    'name': [name for name in all_forecasts]
}

In [15]:
# the above code converts the df into a list of lists
# for the transformed data in df format again, see below
all_forecasts_dict_df = pd.DataFrame(all_forecasts_dict)
all_forecasts_dict_df.head()

Unnamed: 0,name,xs,ys
0,Navigant 2014-08,"[2000-07-01T00:00:00.000000000, 2001-07-01T00:...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
1,WoodMac 2014-02,"[2000-07-01T00:00:00.000000000, 2001-07-01T00:...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2,Rice Model 2014-04,"[2000-07-01T00:00:00.000000000, 2001-07-01T00:...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
3,Medlock & Jaffe 2011,"[2000-07-01T00:00:00.000000000, 2001-07-01T00:...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
4,UT $4/Mcf,"[2000-07-01T00:00:00.000000000, 2001-07-01T00:...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


In [16]:
all_forecasts_dict_df['name']

0         Navigant 2014-08
1          WoodMac 2014-02
2       Rice Model 2014-04
3     Medlock & Jaffe 2011
4                UT $4/Mcf
5        UT EIA price case
6                UT $6/Mcf
7             EIA AEO 2016
8             EIA AEO 2015
9             EIA AEO 2014
10            EIA AEO 2013
11            EIA AEO 2012
12              historical
Name: name, dtype: object

In [18]:
# append additional columns to all_forecasts_dict for styling
# do this *after* steps above, 
# otherwise it gets complicated to separate data points from meta-data
# and doing it here by passing in a dict for specifying styling
# will make it easier to experiment with styling

mypalette = [Greys9[3], Greys9[3], Greys9[3], Greys9[3], 
             Blues5[0], Blues5[1], Blues5[2],
             Reds6[0], Reds6[1], Reds6[2], Reds6[3], Reds6[4], 
             'black'
            ]

# check lengths match
len(all_forecasts_dict_df['name']), len(mypalette)

(13, 13)

In [21]:
source = ColumnDataSource(all_forecasts_dict)

p = figure(title='Marcellus natural gas production outlooks', 
            width=500, height=500, x_axis_type="datetime",
            tools=[HoverTool(line_policy='next',
                             tooltips='@name',
                             ),
                   TapTool(),
                  ],
            )

p.multi_line(source=source,
              xs='xs',
              ys='ys',
              line_color=mypalette, hover_line_color='mediumspringgreen',
              line_width=2,
              line_alpha=0.7, hover_line_alpha=1,
             )

p.xaxis.axis_label = "years"
p.yaxis.axis_label = "gas production (Bcf/d)"

show(p)