In [1]:
# Standard
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import re
import warnings; warnings.simplefilter('ignore')
from dfply import * # dplyr equivalent
import datetime

from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

# Cleaning Transcripts

In [2]:
df1 = pd.read_csv(r"C:\Users\Ryan\Documents\GitHub\NLP_Final_Project\earning_calls.csv")
df1.head(n=10)

# Set index
df1['Index'] = range(1, len(df1)+1)
df1.set_index('Index')

Unnamed: 0_level_0,href,Transcript,Ticker
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,/earnings/call-transcripts/2020/04/30/microsof...,"[""OperatorGreetings and welcome to the Microso...",msft
2,/earnings/call-transcripts/2020/01/30/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft
3,/earnings/call-transcripts/2019/10/23/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft
4,/earnings/call-transcripts/2019/07/18/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft
5,/earnings/call-transcripts/2019/04/25/microsof...,"['Prepared Remarks:', 'Operator', 'Greetings, ...",msft
...,...,...,...
3491,/earnings/call-transcripts/2019/08/28/coty-inc...,"[""OperatorGood morning, ladies and gentlemen. ...",coty
3492,/earnings/call-transcripts/2019/05/08/coty-inc...,[],coty
3493,/earnings/call-transcripts/2019/02/08/coty-inc...,"['Contents:', '', 'Prepared Remarks', 'Questio...",coty
3494,/earnings/call-transcripts/2018/11/07/coty-inc...,"['Prepared Remarks:', 'Operator', ""Good mornin...",coty


In [3]:
# Get Date

# Split href
href = df1['href'].str.split("/",n=6,expand=True)

# Drop unnecessary axes
href1 = href.drop([0,1,2,6], axis = 1)

# Rename Axes
href1 = href1.rename(columns = {3:"Year",4:"Month",5:"Day"})

# Create Index to merge on
href1['Index'] = range(1, len(href1)+1)
href1.set_index('Index')
href1.head()

# Join to Original Dataframe 
df1 = df1.merge(href1, how = 'outer', on = 'Index' )

# Fix the year column due to inconsistent href links
df1['Year'] = df1.href.apply(lambda x: (re.findall(r"20[0-9]{2}", x)[0]))

# Create date column
df1['Date'] = pd.to_datetime(df1[['Year', 'Month', 'Day']])

df1.head()

Unnamed: 0,href,Transcript,Ticker,Index,Year,Month,Day,Date
0,/earnings/call-transcripts/2020/04/30/microsof...,"[""OperatorGreetings and welcome to the Microso...",msft,1,2020,4,30,2020-04-30
1,/earnings/call-transcripts/2020/01/30/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft,2,2020,1,30,2020-01-30
2,/earnings/call-transcripts/2019/10/23/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft,3,2019,10,23,2019-10-23
3,/earnings/call-transcripts/2019/07/18/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft,4,2019,7,18,2019-07-18
4,/earnings/call-transcripts/2019/04/25/microsof...,"['Prepared Remarks:', 'Operator', 'Greetings, ...",msft,5,2019,4,25,2019-04-25


In [4]:
# Get Quarter

# Split existing href
href2 = href[6].str.split(r'-\d',n = 3, expand = True)
href3 = href2[0].str.split('-q', n = 4, expand = True)

# Rename and drop unnessecary columns
href3 = href3.rename(columns = {1:'Quarter'})
href3 = href3.drop(columns = [0,2])

# Create Index
href3['Index'] = range(1, len(href1)+1)
href3.set_index('Index')
href3.head()

# Merge to existing dataframe
df1 = df1.merge(href3, how = 'outer', on = 'Index')
df1.head()

Unnamed: 0,href,Transcript,Ticker,Index,Year,Month,Day,Date,Quarter
0,/earnings/call-transcripts/2020/04/30/microsof...,"[""OperatorGreetings and welcome to the Microso...",msft,1,2020,4,30,2020-04-30,3
1,/earnings/call-transcripts/2020/01/30/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft,2,2020,1,30,2020-01-30,2
2,/earnings/call-transcripts/2019/10/23/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft,3,2019,10,23,2019-10-23,1
3,/earnings/call-transcripts/2019/07/18/microsof...,"[""OperatorWelcome to the Microsoft Fiscal Year...",msft,4,2019,7,18,2019-07-18,4
4,/earnings/call-transcripts/2019/04/25/microsof...,"['Prepared Remarks:', 'Operator', 'Greetings, ...",msft,5,2019,4,25,2019-04-25,3


In [5]:
#bootleg company name column
df1['CompanyName'] = df1['href'].str[38:]

In [6]:
# Reorder column names

cols = df1.columns.tolist()
myorder = [7,0,5,6,1,3,4,2]
cols = [cols[i] for i in myorder]
df1 = df1[cols]

In [7]:
df1.head()

Unnamed: 0,Date,href,Month,Day,Transcript,Index,Year,Ticker
0,2020-04-30,/earnings/call-transcripts/2020/04/30/microsof...,4,30,"[""OperatorGreetings and welcome to the Microso...",1,2020,msft
1,2020-01-30,/earnings/call-transcripts/2020/01/30/microsof...,1,30,"[""OperatorWelcome to the Microsoft Fiscal Year...",2,2020,msft
2,2019-10-23,/earnings/call-transcripts/2019/10/23/microsof...,10,23,"[""OperatorWelcome to the Microsoft Fiscal Year...",3,2019,msft
3,2019-07-18,/earnings/call-transcripts/2019/07/18/microsof...,7,18,"[""OperatorWelcome to the Microsoft Fiscal Year...",4,2019,msft
4,2019-04-25,/earnings/call-transcripts/2019/04/25/microsof...,4,25,"['Prepared Remarks:', 'Operator', 'Greetings, ...",5,2019,msft


In [8]:
# Contraction dictionary
contractions_dict = {
    "didn't": 'did not',
    "don't": 'do not',
    "aren't": 'are not',
    "can't": 'cannot',
    "could've": "could've",
    "couldn't": "could not",
    "i'll": "i will",
    "i'd": "i would",
    "i'm": "i am",
    "it'll": "it will",
    "we'll": "we will"
    
}

contractions_re = re.compile('(%s)' % '|'.join(contractions_dict.keys()))

def expand_contractions(s, contractions_dict=contractions_dict):
    def replace(match):
        return contractions_dict[match.group(0)]
    return contractions_re.sub(replace, s)

In [9]:
expand_contractions(r"couldn't")

'could not'

In [10]:
# Standard function to clean string
def clean(string):
    x = string.lower() # lowercases the string
    x = expand_contractions(x) # replaces contractions
    x = re.sub(r'\W+', ' ', x) # takes only alpha numeric
    return x

In [11]:
df1['clean_transcript'] = df1.Transcript.apply(lambda x: clean(x))

In [12]:
df1.head()

Unnamed: 0,Date,href,Month,Day,Transcript,Index,Year,Ticker,clean_transcript
0,2020-04-30,/earnings/call-transcripts/2020/04/30/microsof...,4,30,"[""OperatorGreetings and welcome to the Microso...",1,2020,msft,operatorgreetings and welcome to the microsof...
1,2020-01-30,/earnings/call-transcripts/2020/01/30/microsof...,1,30,"[""OperatorWelcome to the Microsoft Fiscal Year...",2,2020,msft,operatorwelcome to the microsoft fiscal year ...
2,2019-10-23,/earnings/call-transcripts/2019/10/23/microsof...,10,23,"[""OperatorWelcome to the Microsoft Fiscal Year...",3,2019,msft,operatorwelcome to the microsoft fiscal year ...
3,2019-07-18,/earnings/call-transcripts/2019/07/18/microsof...,7,18,"[""OperatorWelcome to the Microsoft Fiscal Year...",4,2019,msft,operatorwelcome to the microsoft fiscal year ...
4,2019-04-25,/earnings/call-transcripts/2019/04/25/microsof...,4,25,"['Prepared Remarks:', 'Operator', 'Greetings, ...",5,2019,msft,prepared remarks operator greetings welcome t...


In [13]:
# List of stopwords
stop = list(STOPWORDS)

# Functions
# removes stop words from a clean transcript
def remove_stop(string):
    wostop = [] # empty list to append to
    word = string.split() # splits string into list
    for n, i in enumerate(word): # iterates over the list
        if i not in stop: # if word in the list not in stop words list
            wostop.append(word[n]) # append to wostop list
    wostop = ' '.join(wostop) # join wostop list into a string
    return wostop # returns a string

# Stems word
def stem(string):
    t = [] # empty list to append
    ps = nltk.stem.PorterStemmer() # stem purposes
    word = string.split() # split string into list
    for item in word: # iterate through list
        stem = ps.stem(item) # stem each word
        t.append(stem) # add new stem word into list
    t = ' '.join(t) # join list of stem words into a string
    return t # returns a string

In [14]:
# Remove stop words
df1['clean_transcript2'] = df1.clean_transcript.apply(lambda x: remove_stop(x))
df1

Unnamed: 0,Date,href,Month,Day,Transcript,Index,Year,Ticker,clean_transcript,clean_transcript2
0,2020-04-30,/earnings/call-transcripts/2020/04/30/microsof...,04,30,"[""OperatorGreetings and welcome to the Microso...",1,2020,msft,operatorgreetings and welcome to the microsof...,operatorgreetings welcome microsoft fiscal yea...
1,2020-01-30,/earnings/call-transcripts/2020/01/30/microsof...,01,30,"[""OperatorWelcome to the Microsoft Fiscal Year...",2,2020,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 sec...
2,2019-10-23,/earnings/call-transcripts/2019/10/23/microsof...,10,23,"[""OperatorWelcome to the Microsoft Fiscal Year...",3,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 fir...
3,2019-07-18,/earnings/call-transcripts/2019/07/18/microsof...,07,18,"[""OperatorWelcome to the Microsoft Fiscal Year...",4,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2019 fou...
4,2019-04-25,/earnings/call-transcripts/2019/04/25/microsof...,04,25,"['Prepared Remarks:', 'Operator', 'Greetings, ...",5,2019,msft,prepared remarks operator greetings welcome t...,prepared remarks operator greetings welcome mi...
...,...,...,...,...,...,...,...,...,...,...
3490,2019-08-28,/earnings/call-transcripts/2019/08/28/coty-inc...,08,28,"[""OperatorGood morning, ladies and gentlemen. ...",3491,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...
3491,2019-05-08,/earnings/call-transcripts/2019/05/08/coty-inc...,05,08,[],3492,2019,coty,,
3492,2019-02-08,/earnings/call-transcripts/2019/02/08/coty-inc...,02,08,"['Contents:', '', 'Prepared Remarks', 'Questio...",3493,2019,coty,contents prepared remarks questions and answe...,contents prepared remarks questions answers ca...
3493,2018-11-07,/earnings/call-transcripts/2018/11/07/coty-inc...,11,07,"['Prepared Remarks:', 'Operator', ""Good mornin...",3494,2018,coty,prepared remarks operator good morning ladies...,prepared remarks operator good morning ladies ...


In [15]:
# final dataframe without empty transcript
df2 = (df1[df1.Transcript.apply(lambda x: len(x) > 10)])
df2

Unnamed: 0,Date,href,Month,Day,Transcript,Index,Year,Ticker,clean_transcript,clean_transcript2
0,2020-04-30,/earnings/call-transcripts/2020/04/30/microsof...,04,30,"[""OperatorGreetings and welcome to the Microso...",1,2020,msft,operatorgreetings and welcome to the microsof...,operatorgreetings welcome microsoft fiscal yea...
1,2020-01-30,/earnings/call-transcripts/2020/01/30/microsof...,01,30,"[""OperatorWelcome to the Microsoft Fiscal Year...",2,2020,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 sec...
2,2019-10-23,/earnings/call-transcripts/2019/10/23/microsof...,10,23,"[""OperatorWelcome to the Microsoft Fiscal Year...",3,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 fir...
3,2019-07-18,/earnings/call-transcripts/2019/07/18/microsof...,07,18,"[""OperatorWelcome to the Microsoft Fiscal Year...",4,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2019 fou...
4,2019-04-25,/earnings/call-transcripts/2019/04/25/microsof...,04,25,"['Prepared Remarks:', 'Operator', 'Greetings, ...",5,2019,msft,prepared remarks operator greetings welcome t...,prepared remarks operator greetings welcome mi...
...,...,...,...,...,...,...,...,...,...,...
3489,2019-11-06,/earnings/call-transcripts/2019/11/06/coty-inc...,11,06,"[""OperatorGood morning, ladies and gentlemen. ...",3490,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...
3490,2019-08-28,/earnings/call-transcripts/2019/08/28/coty-inc...,08,28,"[""OperatorGood morning, ladies and gentlemen. ...",3491,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...
3492,2019-02-08,/earnings/call-transcripts/2019/02/08/coty-inc...,02,08,"['Contents:', '', 'Prepared Remarks', 'Questio...",3493,2019,coty,contents prepared remarks questions and answe...,contents prepared remarks questions answers ca...
3493,2018-11-07,/earnings/call-transcripts/2018/11/07/coty-inc...,11,07,"['Prepared Remarks:', 'Operator', ""Good mornin...",3494,2018,coty,prepared remarks operator good morning ladies...,prepared remarks operator good morning ladies ...


# Merging Stock onto Transcript

In [16]:
# Function to use later
# Find the next available date
def next(x):
    if x.weekday() == 5: # If next day is saturday, return next monday
        return x + pd.to_timedelta(2, unit = "D")
    elif x.weekday() == 6: # If next day is sunday, return next monday
        return x + pd.to_timedelta(1, unit = "D")
    else:
        return x # If its weekday, return same date

In [17]:
# Read stock data cleaned through alteryx
stonks = pd.read_csv(r"C:\Users\Ryan\Documents\GitHub\NLP_Final_Project\data\Company_Stock_Price_Clean.csv")
stonks.head()

Unnamed: 0,Date,Ticker,Adj_Close
0,2017-01-03,MSFT,58.969059
1,2017-01-03,AAPL,110.392334
2,2017-01-03,AMZN,753.669983
3,2017-01-03,FB,116.860001
4,2017-01-03,GOOGL,808.01001


In [18]:
stonks.Ticker = stonks.Ticker.apply(lambda x: x.lower()) # lowercase tickers
stonks.Date = pd.to_datetime(stonks.Date) # Change to dateformat
stonks.head()

Unnamed: 0,Date,Ticker,Adj_Close
0,2017-01-03,msft,58.969059
1,2017-01-03,aapl,110.392334
2,2017-01-03,amzn,753.669983
3,2017-01-03,fb,116.860001
4,2017-01-03,googl,808.01001


In [19]:
# Create next available day : date + 1 day
df2['Next_Available_Day'] = pd.to_datetime(df2.Date)+pd.to_timedelta(1, unit = "D")

# If Next Available day is on a weekend, set it to next monday
df2.Next_Available_Day = df2.Next_Available_Day.apply(lambda x: next(x))
df2

Unnamed: 0,Date,href,Month,Day,Transcript,Index,Year,Ticker,clean_transcript,clean_transcript2,Next_Available_Day
0,2020-04-30,/earnings/call-transcripts/2020/04/30/microsof...,04,30,"[""OperatorGreetings and welcome to the Microso...",1,2020,msft,operatorgreetings and welcome to the microsof...,operatorgreetings welcome microsoft fiscal yea...,2020-05-01
1,2020-01-30,/earnings/call-transcripts/2020/01/30/microsof...,01,30,"[""OperatorWelcome to the Microsoft Fiscal Year...",2,2020,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 sec...,2020-01-31
2,2019-10-23,/earnings/call-transcripts/2019/10/23/microsof...,10,23,"[""OperatorWelcome to the Microsoft Fiscal Year...",3,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 fir...,2019-10-24
3,2019-07-18,/earnings/call-transcripts/2019/07/18/microsof...,07,18,"[""OperatorWelcome to the Microsoft Fiscal Year...",4,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2019 fou...,2019-07-19
4,2019-04-25,/earnings/call-transcripts/2019/04/25/microsof...,04,25,"['Prepared Remarks:', 'Operator', 'Greetings, ...",5,2019,msft,prepared remarks operator greetings welcome t...,prepared remarks operator greetings welcome mi...,2019-04-26
...,...,...,...,...,...,...,...,...,...,...,...
3489,2019-11-06,/earnings/call-transcripts/2019/11/06/coty-inc...,11,06,"[""OperatorGood morning, ladies and gentlemen. ...",3490,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...,2019-11-07
3490,2019-08-28,/earnings/call-transcripts/2019/08/28/coty-inc...,08,28,"[""OperatorGood morning, ladies and gentlemen. ...",3491,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...,2019-08-29
3492,2019-02-08,/earnings/call-transcripts/2019/02/08/coty-inc...,02,08,"['Contents:', '', 'Prepared Remarks', 'Questio...",3493,2019,coty,contents prepared remarks questions and answe...,contents prepared remarks questions answers ca...,2019-02-11
3493,2018-11-07,/earnings/call-transcripts/2018/11/07/coty-inc...,11,07,"['Prepared Remarks:', 'Operator', ""Good mornin...",3494,2018,coty,prepared remarks operator good morning ladies...,prepared remarks operator good morning ladies ...,2018-11-08


In [20]:
# Merge Stock and Transcript
merge_df = pd.merge(df2, stonks, on = ['Date', 'Ticker'], how = 'inner')
merge_df = merge_df.rename(columns={'Adj_Close': 'Date_Adj_Close'}) # Rename
merge_df.head()

Unnamed: 0,Date,href,Month,Day,Transcript,Index,Year,Ticker,clean_transcript,clean_transcript2,Next_Available_Day,Date_Adj_Close
0,2020-04-30,/earnings/call-transcripts/2020/04/30/microsof...,4,30,"[""OperatorGreetings and welcome to the Microso...",1,2020,msft,operatorgreetings and welcome to the microsof...,operatorgreetings welcome microsoft fiscal yea...,2020-05-01,178.71228
1,2020-01-30,/earnings/call-transcripts/2020/01/30/microsof...,1,30,"[""OperatorWelcome to the Microsoft Fiscal Year...",2,2020,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 sec...,2020-01-31,171.830795
2,2019-10-23,/earnings/call-transcripts/2019/10/23/microsof...,10,23,"[""OperatorWelcome to the Microsoft Fiscal Year...",3,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 fir...,2019-10-24,136.023209
3,2019-07-18,/earnings/call-transcripts/2019/07/18/microsof...,7,18,"[""OperatorWelcome to the Microsoft Fiscal Year...",4,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2019 fou...,2019-07-19,134.761719
4,2019-04-25,/earnings/call-transcripts/2019/04/25/microsof...,4,25,"['Prepared Remarks:', 'Operator', 'Greetings, ...",5,2019,msft,prepared remarks operator greetings welcome t...,prepared remarks operator greetings welcome mi...,2019-04-26,127.109573


In [21]:
# create a duplicate stocks file. 
temp = stonks
temp = temp.rename(columns={'Date': 'Next_Available_Day'}) # Rename the date column to 1_day_after to match the mergedf

# Merge the temp table with the already merged dataframe
final = pd.merge(merge_df, temp, on = ['Next_Available_Day', 'Ticker'], how = 'inner')
final = final.rename(columns={'Adj_Close': 'Next_Available_Day_Adj_Close'})
final

Unnamed: 0,Date,href,Month,Day,Transcript,Index,Year,Ticker,clean_transcript,clean_transcript2,Next_Available_Day,Date_Adj_Close,Next_Available_Day_Adj_Close
0,2020-04-30,/earnings/call-transcripts/2020/04/30/microsof...,04,30,"[""OperatorGreetings and welcome to the Microso...",1,2020,msft,operatorgreetings and welcome to the microsof...,operatorgreetings welcome microsoft fiscal yea...,2020-05-01,178.712280,174.085175
1,2020-01-30,/earnings/call-transcripts/2020/01/30/microsof...,01,30,"[""OperatorWelcome to the Microsoft Fiscal Year...",2,2020,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 sec...,2020-01-31,171.830795,169.294800
2,2019-10-23,/earnings/call-transcripts/2019/10/23/microsof...,10,23,"[""OperatorWelcome to the Microsoft Fiscal Year...",3,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 fir...,2019-10-24,136.023209,138.699264
3,2019-07-18,/earnings/call-transcripts/2019/07/18/microsof...,07,18,"[""OperatorWelcome to the Microsoft Fiscal Year...",4,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2019 fou...,2019-07-19,134.761719,134.959274
4,2019-04-25,/earnings/call-transcripts/2019/04/25/microsof...,04,25,"['Prepared Remarks:', 'Operator', 'Greetings, ...",5,2019,msft,prepared remarks operator greetings welcome t...,prepared remarks operator greetings welcome mi...,2019-04-26,127.109573,127.837875
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2987,2019-11-06,/earnings/call-transcripts/2019/11/06/coty-inc...,11,06,"[""OperatorGood morning, ladies and gentlemen. ...",3490,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...,2019-11-07,12.747121,12.727539
2988,2019-08-28,/earnings/call-transcripts/2019/08/28/coty-inc...,08,28,"[""OperatorGood morning, ladies and gentlemen. ...",3491,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...,2019-08-29,9.019238,9.038573
2989,2019-02-08,/earnings/call-transcripts/2019/02/08/coty-inc...,02,08,"['Contents:', '', 'Prepared Remarks', 'Questio...",3493,2019,coty,contents prepared remarks questions and answe...,contents prepared remarks questions answers ca...,2019-02-11,8.832841,9.145256
2990,2018-11-07,/earnings/call-transcripts/2018/11/07/coty-inc...,11,07,"['Prepared Remarks:', 'Operator', ""Good mornin...",3494,2018,coty,prepared remarks operator good morning ladies...,prepared remarks operator good morning ladies ...,2018-11-08,8.078259,7.742442


In [22]:
# Percent change
final['Percent Change'] = ((final.Next_Available_Day_Adj_Close - final.Date_Adj_Close)/ final.Date_Adj_Close)*100

# If percent change is positive, label 1, if percent change is negative, label 0
final['Label'] = np.where(final.Next_Available_Day_Adj_Close > final.Date_Adj_Close, 1, 0)
final

Unnamed: 0,Date,href,Month,Day,Transcript,Index,Year,Ticker,clean_transcript,clean_transcript2,Next_Available_Day,Date_Adj_Close,Next_Available_Day_Adj_Close,Percent Change,Label
0,2020-04-30,/earnings/call-transcripts/2020/04/30/microsof...,04,30,"[""OperatorGreetings and welcome to the Microso...",1,2020,msft,operatorgreetings and welcome to the microsof...,operatorgreetings welcome microsoft fiscal yea...,2020-05-01,178.712280,174.085175,-2.589137,0
1,2020-01-30,/earnings/call-transcripts/2020/01/30/microsof...,01,30,"[""OperatorWelcome to the Microsoft Fiscal Year...",2,2020,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 sec...,2020-01-31,171.830795,169.294800,-1.475868,0
2,2019-10-23,/earnings/call-transcripts/2019/10/23/microsof...,10,23,"[""OperatorWelcome to the Microsoft Fiscal Year...",3,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2020 fir...,2019-10-24,136.023209,138.699264,1.967352,1
3,2019-07-18,/earnings/call-transcripts/2019/07/18/microsof...,07,18,"[""OperatorWelcome to the Microsoft Fiscal Year...",4,2019,msft,operatorwelcome to the microsoft fiscal year ...,operatorwelcome microsoft fiscal year 2019 fou...,2019-07-19,134.761719,134.959274,0.146596,1
4,2019-04-25,/earnings/call-transcripts/2019/04/25/microsof...,04,25,"['Prepared Remarks:', 'Operator', 'Greetings, ...",5,2019,msft,prepared remarks operator greetings welcome t...,prepared remarks operator greetings welcome mi...,2019-04-26,127.109573,127.837875,0.572972,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2987,2019-11-06,/earnings/call-transcripts/2019/11/06/coty-inc...,11,06,"[""OperatorGood morning, ladies and gentlemen. ...",3490,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...,2019-11-07,12.747121,12.727539,-0.153617,0
2988,2019-08-28,/earnings/call-transcripts/2019/08/28/coty-inc...,08,28,"[""OperatorGood morning, ladies and gentlemen. ...",3491,2019,coty,operatorgood morning ladies and gentlemen my ...,operatorgood morning ladies gentlemen name mar...,2019-08-29,9.019238,9.038573,0.214373,1
2989,2019-02-08,/earnings/call-transcripts/2019/02/08/coty-inc...,02,08,"['Contents:', '', 'Prepared Remarks', 'Questio...",3493,2019,coty,contents prepared remarks questions and answe...,contents prepared remarks questions answers ca...,2019-02-11,8.832841,9.145256,3.536972,1
2990,2018-11-07,/earnings/call-transcripts/2018/11/07/coty-inc...,11,07,"['Prepared Remarks:', 'Operator', ""Good mornin...",3494,2018,coty,prepared remarks operator good morning ladies...,prepared remarks operator good morning ladies ...,2018-11-08,8.078259,7.742442,-4.157039,0


In [23]:
# Write to csv
final.to_csv(r"C:\Users\Ryan\Documents\GitHub\NLP_Final_Project\clean_data_v2.csv")