In [1]:
# Data manipulation
import pandas as pd
import numpy as np

# Options for pandas
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100
pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x))

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython import get_ipython
ipython = get_ipython()

# autoreload extension
if 'autoreload' not in ipython.extension_manager.loaded:
    %load_ext autoreload

%autoreload 2

# Ignore exceptions
import expectexception

# Visualizations
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

# Activate the automatic conversion for pandas
from rpy2.robjects import pandas2ri
pandas2ri.activate()

# Load the needed extension for the %%R cell magic
%load_ext rpy2.ipython

# Load ipywidgets module
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [2]:
%%R

# load the r library
library(ggplot2)
library(ggalt)
theme_set(theme_light())

library(scales)
library(dplyr)



Attaching package: ‘dplyr’




The following objects are masked from ‘package:stats’:

    filter, lag




The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union





In [3]:
SWN = pd.read_csv('../clean_csv/SWN_clean.csv')

production = pd.read_csv('../clean_csv/production_ann.csv')

In [4]:
SWN = SWN[['doc_id', 'maturity', 'product_type', 'instrument_type', 'vol_1', 'vol_1_type']]

production = production[['year', 'ticker', 'OIL_PRODUCTION', 'NGL_PRODUCTION', 'GAS_PRODUCTION']]

In [5]:
import re
SWN['year'] = SWN['doc_id'].apply(lambda x: int(re.findall('\d{4}', x)[0]))
SWN['ticker'] = SWN['doc_id'].apply(lambda x: re.findall('[A-Z]+', x)[0])

SWN = SWN.loc[(SWN['maturity'] == SWN['year']) & (~SWN['instrument_type'].str.contains('basis'))].reset_index(drop=True)


In [6]:
unit_convertion = {
    'bcf': 25/9,
    'mbbls': 1/365
}

product_convertion = {
    'natural gas': 'Natural Gas',
    'oil': 'Oil',
    'natural gas (bcf)': 'Natural Gas'
}

In [7]:
SWN['multiplier'] = SWN['vol_1_type'].map(unit_convertion)
SWN['product'] = SWN['product_type'].map(product_convertion)
SWN['volume'] = SWN['vol_1'] * SWN['multiplier']

SWN = SWN[['ticker', 'year', 'product', 'volume', 'instrument_type']]
SWN = SWN.drop_duplicates()
SWN = SWN.drop('instrument_type', axis = 1)

SWN = SWN.groupby(['year', 'product']).agg({'volume':'sum', 'ticker':'first'}).reset_index()
SWN_merge = pd.merge(SWN, production, on=['year', 'ticker'])

SWN_merge

Unnamed: 0,year,product,volume,ticker,OIL_PRODUCTION,NGL_PRODUCTION,GAS_PRODUCTION
0,2003,Natural Gas,88.611,SWN,1.455,,104.11
1,2003,Oil,0.658,SWN,1.455,,104.11
2,2004,Natural Gas,99.722,SWN,1.693,,138.1
3,2004,Oil,1.167,SWN,1.693,,138.1
4,2005,Natural Gas,137.222,SWN,1.93,,155.62
5,2005,Oil,0.986,SWN,1.93,,155.62
6,2006,Natural Gas,143.889,SWN,1.91,,186.56
7,2006,Oil,0.329,SWN,1.91,,186.56
8,2007,Natural Gas,193.889,SWN,1.68,,301.01
9,2008,Natural Gas,288.056,SWN,1.05,,526.75


In [8]:
SWN_merge['production'] = np.where(SWN_merge['product'] == 'Natural Gas', SWN_merge['GAS_PRODUCTION'], 
         np.where(SWN_merge['product'] == 'Oil', SWN_merge['OIL_PRODUCTION'], SWN_merge['NGL_PRODUCTION']))

SWN_merge['hedge_ratio'] = SWN_merge['volume'] / SWN_merge['production']

SWN_merge = SWN_merge[['ticker', 'year', 'product', 'volume', 'production', 'hedge_ratio']]

SWN_merge

Unnamed: 0,ticker,year,product,volume,production,hedge_ratio
0,SWN,2003,Natural Gas,88.611,104.11,0.851
1,SWN,2003,Oil,0.658,1.455,0.452
2,SWN,2004,Natural Gas,99.722,138.1,0.722
3,SWN,2004,Oil,1.167,1.693,0.689
4,SWN,2005,Natural Gas,137.222,155.62,0.882
5,SWN,2005,Oil,0.986,1.93,0.511
6,SWN,2006,Natural Gas,143.889,186.56,0.771
7,SWN,2006,Oil,0.329,1.91,0.172
8,SWN,2007,Natural Gas,193.889,301.01,0.644
9,SWN,2008,Natural Gas,288.056,526.75,0.547


In [9]:
SWN_merge.to_csv('../hedge_ratio/SWN_hedge_ratio.csv', index=False)

In [10]:
# production.to_csv('production_clean.csv', index=False)