In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

In [23]:
data = pd.read_csv('data/qaData.csv', parse_dates=['Date'])

In [24]:
data.head()

Unnamed: 0,Company,Participants,Date,EventName,EventType,AnalystName,AnalystCompany,Question,Answer,RegularTag1,RegularTag2,RegularTag3,EarningTag1,EarningTag2,EarningTag3
0,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,Glenn Schorr,Evercore ISI,"The performance in equities was great, and you...",There wasn’t anything particularly noteworthy ...,NIR,Markets,Equities,LOB,CIB,
1,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,Glenn Schorr,Evercore ISI,You and others have been talking with your pri...,"Yes, this is where it would be. I wouldn’t say...",NIR,Markets,Prime brokerage / ROA,LOB,CIB,
2,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,Glenn Schorr,Evercore ISI,In Jamie’s letter he talked about mentioning t...,"No, it is more of the same. Obviously, G-SIB t...",Regulatory Capital and Liquidity,GSIB,,Firmwide,Capital,
3,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,John McDonald,Sanford C. Bernstein & Co.,"On net interest income, do you have an outlook...","So again, assuming for a second that rates don...",NII / Balance Sheet,ALM / Rates / Duration,Rates,Firmwide,Balance sheet,
4,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,Erika Najarian,Bank of America,"On CCAR, do you expect any potential surcharge...","Taking your first point, Erika, obviously, I d...",Regulatory Capital and Liquidity,GSIB,CCAR,Firmwide,Capital,


In [26]:
data['Lag1'] = data.groupby(["Company", "Participants", "Date", "EventName", "EventType"])['EarningTag2'].shift(1)
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month

In [27]:
data.head()

Unnamed: 0,Company,Participants,Date,EventName,EventType,AnalystName,AnalystCompany,Question,Answer,RegularTag1,RegularTag2,RegularTag3,EarningTag1,EarningTag2,EarningTag3,Lag1,Year,Month
0,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,Glenn Schorr,Evercore ISI,"The performance in equities was great, and you...",There wasn’t anything particularly noteworthy ...,NIR,Markets,Equities,LOB,CIB,,,2015,4
1,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,Glenn Schorr,Evercore ISI,You and others have been talking with your pri...,"Yes, this is where it would be. I wouldn’t say...",NIR,Markets,Prime brokerage / ROA,LOB,CIB,,CIB,2015,4
2,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,Glenn Schorr,Evercore ISI,In Jamie’s letter he talked about mentioning t...,"No, it is more of the same. Obviously, G-SIB t...",Regulatory Capital and Liquidity,GSIB,,Firmwide,Capital,,CIB,2015,4
3,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,John McDonald,Sanford C. Bernstein & Co.,"On net interest income, do you have an outlook...","So again, assuming for a second that rates don...",NII / Balance Sheet,ALM / Rates / Duration,Rates,Firmwide,Balance sheet,,Capital,2015,4
4,JPMorgan Chase,"Jamie Dimon, Marianne Lake",2015-04-14,1Q15 earnings call,Earnings call,Erika Najarian,Bank of America,"On CCAR, do you expect any potential surcharge...","Taking your first point, Erika, obviously, I d...",Regulatory Capital and Liquidity,GSIB,CCAR,Firmwide,Capital,,Balance sheet,2015,4


In [48]:
nn_data = data.loc[data['EventType']=="Earnings call", ['Company', 'Participants', 'Month', 'Year', 'AnalystName',	'AnalystCompany', 'EventName', 'Lag1', 'EarningTag2']].copy()
nn_data['Quarter'] = nn_data['EventName'].str.split("Q").str[0]
nn_data = nn_data[['Company', "Participants", "AnalystName", "AnalystCompany", "Month", "Year", "Quarter", "Lag1", "EarningTag2"]].copy()

In [49]:
nn_data.head()

Unnamed: 0,Company,Participants,AnalystName,AnalystCompany,Month,Year,Quarter,Lag1,EarningTag2
0,JPMorgan Chase,"Jamie Dimon, Marianne Lake",Glenn Schorr,Evercore ISI,4,2015,1,,CIB
1,JPMorgan Chase,"Jamie Dimon, Marianne Lake",Glenn Schorr,Evercore ISI,4,2015,1,CIB,CIB
2,JPMorgan Chase,"Jamie Dimon, Marianne Lake",Glenn Schorr,Evercore ISI,4,2015,1,CIB,Capital
3,JPMorgan Chase,"Jamie Dimon, Marianne Lake",John McDonald,Sanford C. Bernstein & Co.,4,2015,1,Capital,Balance sheet
4,JPMorgan Chase,"Jamie Dimon, Marianne Lake",Erika Najarian,Bank of America,4,2015,1,Balance sheet,Capital


In [87]:
nn_data_encoded = pd.concat([nn_data, 
                             pd.get_dummies(nn_data['Company']),
                             pd.get_dummies(nn_data['Participants']),
                             pd.get_dummies(nn_data['AnalystName']),
                             pd.get_dummies(nn_data['AnalystCompany']),], axis=1)

new_cols = pd.get_dummies(nn_data['Company']).columns.tolist() + \
             pd.get_dummies(nn_data['Participants']).columns.tolist() + \
             pd.get_dummies(nn_data['AnalystName']).columns.tolist() + \
             pd.get_dummies(nn_data['AnalystCompany']).columns.tolist()

nn_data_encoded = nn_data_encoded[['Month', "Year", "Quarter", "Lag1", "EarningTag2"] + new_cols].copy()
new_cols = [col.replace(" ", "") for col in new_cols]
#nn_data_encoded.columns = ['Month', "Year", "Quarter", "Lag1", "EarningTag2"] + new_cols

In [92]:
nn_data_encoded.shape

(2823, 126)

In [89]:
for col in nn_data_encoded:
    print(col)

Month
Year
Quarter
Lag1
EarningTag2
Bank of America
Bank of America
Citigroup
Goldman Sachs
JPMorgan Chase
Morgan Stanley
Morgan Stanley
PNC
Wells Fargo
Wells Fargo
Dane Holmes, Harvey Schwartz
Harvey Schwartz
James Gorman, Jonathan Pruzan, Sharon Yeshaya
James Gorman, Kathleen McCabe, Jonathan Pruzan
James Gorman, Kathleen McCabe, Ruth Porat
James Rowe, John Shrewsberry, John Stumpf
James Rowe, John Shrewsberry, Timothy Sloan
Jamie Dimon, Marianne Lake
John Campbell, John Shrewsberry, Timothy Sloan
John Gerspach, Michael Verdeschi
Marianne Lake
Martin Chavez, Dane Holmes
Martin Chavez, Dane Holmes, Harvey Schwartz
Martin Chavez, Heather Miner
Michael Corbat, John Gerspach
Mike Corbat, John Gerspach
Mike Corbat, John Gerspach, Susan Kendall
Paul Donofrio, Brian Moynihan
Paul Donofrio, Lee McEntire, Brian Moynihan
William Callihan, William Demchak, Robert Reilly
William Demchak, Bryan Gill, Robert Reilly
Adam Hurwich
Alevizos Alevizakos
Andrew Lim
Bill Carcache
Brennan Hawken
Brian Fora