In [1]:
#import required libraries
import pandas as pd
import numpy as np
from scipy import special
import matplotlib as mpl
import matplotlib.pyplot as plt

#plotly imports
import plotly as py
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
#set the plotly offline mode
py.offline.init_notebook_mode(connected=True)
import datetime
from datetime import timedelta

import folium
# Supress warnings
import warnings
warnings.filterwarnings('ignore')

In [68]:
#Read data
df_EmissionByGasSec = pd.read_csv('Emissions_aggregatedData.csv',sep='|')

#Pickup required columns and rename them
Emissioncolumns = ['STANDARD_COMPANY_NAME','REPORTING YEAR',  'GAS', 'SECTOR', 'GHG_CONTRIBUTION', '2018_UPSTREAM_RANK', '2018_MIDSTREAM_RANK', '2018_OVERALL_RANK']
df_EmissionByGasSec = df_EmissionByGasSec[Emissioncolumns]
df_EmissionByGasSec=df_EmissionByGasSec.rename(columns={'STANDARD_COMPANY_NAME':'COMPANY', 'REPORTING YEAR':'REPORTING_YEAR'})

#Aggregate the data by Company, Year and Sector
df_aggByYearComp=df_EmissionByGasSec[['COMPANY','REPORTING_YEAR', 'SECTOR', 'GHG_CONTRIBUTION']]\
.groupby(['COMPANY','REPORTING_YEAR','SECTOR']).sum().reset_index()\
.sort_values('GHG_CONTRIBUTION',ascending=False)


#Assign a rank by their total emission in last 9 years
#Midstream rank
df_mid = df_aggByYearComp[df_aggByYearComp['SECTOR']=='Midstream']
df_mid=df_mid[['COMPANY','GHG_CONTRIBUTION']].groupby('COMPANY').sum().reset_index().sort_values('GHG_CONTRIBUTION',ascending=False)\
.reset_index().drop('index',axis=1).reset_index().rename(columns={'index':'MIDSTREAM_RANK'})
df_mid['MIDSTREAM_RANK']=df_mid['MIDSTREAM_RANK']+1
df_mid=df_mid.drop('GHG_CONTRIBUTION', axis=1)
#df_mid.head()

#Upstream rank
df_up = df_aggByYearComp[df_aggByYearComp['SECTOR']=='Upstream']
df_up=df_up[['COMPANY','GHG_CONTRIBUTION']].groupby('COMPANY').sum().reset_index()\
.sort_values('GHG_CONTRIBUTION',ascending=False)\
.reset_index().drop('index',axis=1).reset_index().rename(columns={'index':'UPSTREAM_RANK'})

df_up['UPSTREAM_RANK']=df_up['UPSTREAM_RANK']+1
df_up=df_up.drop('GHG_CONTRIBUTION', axis=1)
#df_up.head()


df_aggByYearComp=df_aggByYearComp.merge(df_mid,how='left', left_on='COMPANY', right_on='COMPANY').merge(df_up,how='left', left_on='COMPANY', right_on='COMPANY')

df_aggByYearComp['MIDSTREAM_RANK']=df_aggByYearComp['MIDSTREAM_RANK'].fillna(max(df_aggByYearComp['MIDSTREAM_RANK'])+1)
df_aggByYearComp['UPSTREAM_RANK']=df_aggByYearComp['UPSTREAM_RANK'].fillna(df_aggByYearComp['UPSTREAM_RANK'].max()+1)
df_aggByYearComp.head()

Unnamed: 0,COMPANY,REPORTING_YEAR,SECTOR,GHG_CONTRIBUTION,MIDSTREAM_RANK,UPSTREAM_RANK
0,ENERGY TRANSFER PARTNERS,2016,Midstream,16982840.0,4.0,430.0
1,ENERGY TRANSFER PARTNERS,2018,Midstream,13645080.0,4.0,430.0
2,ENERGY TRANSFER PARTNERS,2017,Midstream,11793390.0,4.0,430.0
3,WILLIAMS,2016,Midstream,11702910.0,2.0,97.0
4,WILLIAMS,2017,Midstream,11291680.0,2.0,97.0


In [77]:
lst_COMPANY=df_aggByYearComp['COMPANY'].unique()
lst_YEAR = df_aggByYearComp['REPORTING_YEAR'].unique()
lst_SECTOR = df_aggByYearComp['SECTOR'].unique()

### Fig 1: Trend line of total GHG emissions from US Oil and Gas Companies between 2011 and 2018.

In [195]:
df_fig1=df_aggByYearComp[['REPORTING_YEAR','GHG_CONTRIBUTION']].groupby('REPORTING_YEAR').sum()\
.reset_index().sort_values('REPORTING_YEAR')

x=df_fig1.REPORTING_YEAR

layout=go.Layout(title='Total emissions between 2011 to 2018 - (METRIC TONS CO2e)',
                xaxis=dict(title='REPORT YEAR')
                )
trace1 =  go.Scatter(x=x,
                     y=df_fig1.GHG_CONTRIBUTION,
                     mode = 'lines+markers',
                     name='Total Emission'
                     ,line = dict(shape='spline')
                    )
fig = go.Figure(data=[trace1], layout=layout)
py.offline.iplot(fig)

In [None]:
### Fig2: A table showing GHG Emissions from individual US Oil and Gas Producers between 2011 and 2018

In [242]:
df_fig2=df_aggByYearComp[['COMPANY', 'REPORTING_YEAR', 'GHG_CONTRIBUTION']].groupby(['COMPANY', 'REPORTING_YEAR']).sum().reset_index()\
.pivot_table(values='GHG_CONTRIBUTION',index='COMPANY',columns='REPORTING_YEAR').reset_index()

df_fig2.to_csv('PlotlyFig2.csv',index=False)
df_fig2=pd.read_csv('PlotlyFig2.csv')

df_fig2['TOTAL_EMISSION']=df_fig2.sum(axis=1)
df_fig2=df_fig2.sort_values('TOTAL_EMISSION', ascending=False).reset_index().drop('index',axis=1)
#df_fig2.head(10)

#Plotly table plot reference: https://plotly.com/python/table/
fig2 = go.Figure(data=[go.Table(columnwidth = [300,250,180,180,180,180,180,180,180,180],
    header=dict(values=['<b>COMPANY</b>', '<b>TOTAL_EMISSION</b>' ,'2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018'],
                #fill_color='paleturquoise',
                line_color='darkslategray',
                fill_color='rgb(235, 70, 52)',
                align='center'),
    cells=dict(values=[df_fig2['COMPANY'],df_fig2['TOTAL_EMISSION'],df_fig2['2011'],df_fig2['2012'],df_fig2['2013'],df_fig2['2014'],df_fig2['2015'],df_fig2['2016'],df_fig2['2017'],df_fig2['2018']],
               fill_color='rgb(255, 191, 0)',
               #line_color='darkslategray',
               align='left'))
])
fig2.update_layout(title='Greenhouse Gas Emissions from the top US Oil & Gas Companies (in METRIC TONS CO2e)'
                   ,width=1400
                  )

fig2.show()

##### Fig-3: Emission trends between 2009 and 2018 from the top US emittors

In [192]:
df_fig3=df_fig2.copy(deep=True)
df_fig3=df_fig3[['COMPANY', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']]
num_of_lines=15

x=['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']
fig3 = go.Figure()
for i in range(num_of_lines):
    trace=go.Scatter(x=x,
                     y=df_fig3.loc[i][['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']],
                     mode='lines+markers',
                     name=df_fig3.loc[i]['COMPANY']
                    )
    fig3.add_trace(trace)
fig3.update_layout(title='US Emission trend over last 9 years by the Top Emittors',
                 xaxis = dict(title = 'REPORT YEAR'),
                 yaxis = dict(title = 'Emission Quantity in Metric Tons (CO2e)'))
fig3.show()

### Interactive chart to compare emissions between different companies and sectors

In [245]:
df_fig4=df_EmissionByGasSec.copy(deep=True)

In [246]:
df_fig4.head()

Unnamed: 0,COMPANY,REPORTING_YEAR,GAS,SECTOR,GHG_CONTRIBUTION,2018_UPSTREAM_RANK,2018_MIDSTREAM_RANK,2018_OVERALL_RANK
0,HILCORP ENERGY,2018,N2O,Upstream,1495.0,1.0,29.0,8.0
1,HILCORP ENERGY,2018,N2O,Midstream,598.70788,1.0,29.0,8.0
2,HILCORP ENERGY,2015,CH4,Midstream,13146.24412,1.0,29.0,8.0
3,HILCORP ENERGY,2015,CH4,Upstream,640199.0,1.0,29.0,8.0
4,HILCORP ENERGY,2015,CO2,Midstream,922041.90366,1.0,29.0,8.0
