In [1]:
# Data manipulation
import pandas as pd
import numpy as np

# Options for pandas
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100
pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x))

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython import get_ipython
ipython = get_ipython()

# autoreload extension
if 'autoreload' not in ipython.extension_manager.loaded:
    %load_ext autoreload

%autoreload 2

# Ignore exceptions
import expectexception

# Visualizations
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

# Activate the automatic conversion for pandas
from rpy2.robjects import pandas2ri
pandas2ri.activate()

# Load the needed extension for the %%R cell magic
%load_ext rpy2.ipython

# Load ipywidgets module
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [2]:
%%R

# load the r library
library(ggplot2)
library(ggalt)
theme_set(theme_light())

library(scales)
library(dplyr)



Attaching package: ‘dplyr’




The following objects are masked from ‘package:stats’:

    filter, lag




The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union





In [3]:
CLR = pd.read_csv('../clean_csv/CLR_clean.csv')

production = pd.read_csv('../clean_csv/production_clean.csv')

In [4]:
CLR = CLR[['doc_id', 'maturity', 'product_type', 'instrument_type', 'vol_1', 'vol_1_type']]

In [5]:
CLR['maturity'] = CLR['maturity'].apply(lambda x: int(str(x)[0:4]))

In [6]:
CLR

Unnamed: 0,doc_id,maturity,product_type,instrument_type,vol_1,vol_1_type
0,CLR20100226,2010,oil,swap,343,mbbls
1,CLR20100226,2010,oil,collar,148,mbbls
2,CLR20100226,2010,oil,swap,410,mbbls
3,CLR20100226,2010,oil,collar,228,mbbls
4,CLR20100226,2010,oil,swap,414,mbbls
5,CLR20100226,2010,oil,collar,598,mbbls
6,CLR20100226,2010,oil,swap,414,mbbls
7,CLR20100226,2010,oil,collar,598,mbbls
8,CLR20100226,2010,oil,swap,343,mbbls
9,CLR20100226,2010,oil,collar,148,mbbls


In [7]:
import re

CLR['year'] = CLR['doc_id'].apply(lambda x: int(re.findall('\d{4}', x)[0]))

CLR['ticker'] = CLR['doc_id'].apply(lambda x: re.findall('[A-Z]+', x)[0])

CLR = CLR.loc[(CLR['maturity'] == CLR['year']) & (~CLR['instrument_type'].str.contains('basis'))].reset_index(drop=True)


In [8]:
unit_convertion = {
    'bcf': 25/9,
    'mbbls': 1/365,
    'bbls': 1/365000,
    'mmbtu': 1/370475,
    'mmmbtu': 40/14819
}

product_convertion = {
    'natural gas': 'Natural Gas',
    'oil': 'Oil',
    'natural gas (bcf)': 'Natural Gas'
}

In [9]:
CLR['multiplier'] = CLR['vol_1_type'].map(unit_convertion)
CLR['product'] = CLR['product_type'].map(product_convertion)
CLR['volume'] = CLR['vol_1'] * CLR['multiplier']

CLR = CLR[['ticker', 'year', 'product', 'volume', 'instrument_type']]

In [10]:
CLR = CLR.drop_duplicates()

CLR = CLR.drop('instrument_type', axis = 1)

In [11]:
CLR = CLR.groupby(['year', 'product']).agg({'volume':'sum', 'ticker':'first'}).reset_index()

In [12]:
CLR_merge = pd.merge(CLR, production, on=['year', 'ticker'])

In [13]:
CLR_merge['production'] = np.where(CLR_merge['product'] == 'Natural Gas', CLR_merge['GAS_PRODUCTION'], 
         np.where(CLR_merge['product'] == 'Oil', CLR_merge['OIL_PRODUCTION'], CLR_merge['NGL_PRODUCTION']))

CLR_merge['hedge_ratio'] = CLR_merge['volume'] / CLR_merge['production']

CLR_merge = CLR_merge[['ticker', 'year', 'product', 'volume', 'production', 'hedge_ratio']]

CLR_merge

Unnamed: 0,ticker,year,product,volume,production,hedge_ratio
0,CLR,2010,Natural Gas,51.51,65.6,0.785
1,CLR,2010,Oil,11.351,32.38,0.351
2,CLR,2011,Natural Gas,70.192,100.47,0.699
3,CLR,2011,Oil,25.867,45.12,0.573
4,CLR,2012,Natural Gas,23.888,174.521,0.137
5,CLR,2012,Oil,44.504,68.497,0.65
6,CLR,2013,Natural Gas,85.323,240.355,0.355
7,CLR,2013,Oil,69.753,95.859,0.728
8,CLR,2014,Natural Gas,282.732,313.137,0.903
9,CLR,2014,Oil,82.382,121.999,0.675


In [14]:
CLR_merge.to_csv('../hedge_ratio/CLR_hedge_ratio.csv', index=False)