> Use Yahoo Finance API to get the daily stock returns.

> Daily returns represent the percentage change in the value of an asset or investment over a single trading day. Daily returns are a measure of the daily price movement of a financial instrument, such as stocks.

# Set Up

In [67]:
# Install libraries needed
!pip install yfinance



In [68]:
import yfinance as yf
import pandas as pd

In [69]:
# Mount drive to read in the data
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Read in Data

In [70]:
# New data file given

# File path to read in the data
file_path = '/content/drive/MyDrive/Vandy/Second Year/Spring semester/NLP Asset Management/Project 1/finished_annotation_5.2k.xlsx'

# Load the file into a df
df = pd.read_excel(file_path)

# Show the first few rows of the df
df.head()

Unnamed: 0,index,title & content,sentiment_perigon,summary,description,Ticker,Sector,Industry,Company,SASB,...,pubDate_brief,pubDate,url,keywords,categories,entities,content,articleId,title,Unnamed: 28
0,12024,Delays Won‚Äö√Ñ√¥t Hurt Japan‚Äö√Ñ√¥s First Ca...,"{'positive': 0.026093118, 'negative': 0.923414...",Osaka Governor Hirofumi Yoshimura said that th...,Years of delay to plans for Japan‚Äö√Ñ√¥s firs...,MGM,Services,Casinos & Gaming,MGM Resorts International,{'Internal Controls on Money Laundering': 'By ...,...,2023-05-18,2023-05-18T21:25:29+00:00,https://www.dailymail.co.uk/health/article-121...,[{'name': 'identifiable user health informatio...,[{'name': 'Health'}],"[{'data': 'Chinese', 'type': 'NORP', 'mentions...",A popular fertility app used by women to track...,7be6da0f9313404ca02afd000b0e787a,Fertility app used by 500k women sold private ...,
1,20675,MetLife (MET) Could Be a Great Choice - Gettin...,"{'positive': 0.8553927, 'negative': 0.01334850...",MetLife (MET) is a Finance stock that has seen...,Dividends are one of the best benefits to bein...,MET,Financials,Insurance,Metlife Inc,{'Financed Emissions': 'Entities participating...,...,2022-10-31,2022-10-31T20:36:25+00:00,https://nypost.com/2022/10/31/lottery-app-jack...,"[{'name': 'ticket sales', 'weight': 0.10697382...",[],"[{'data': 'Powerball', 'type': 'WORK_OF_ART', ...",You could win Monday night‚Äôs historic $1 bil...,28cbde589b2b4954a5e39dd9df22624e,More and more tickets being sold online as Pow...,
2,33685,New York Cements Itself as the Gold Mining Cap...,"{'positive': 0.46477953, 'negative': 0.0338994...","This week, top-five producer AngloGold Ashanti...",(Bloomberg) -- The momentum has been building ...,NEM,Extractives & Minerals Processing,Metals & Mining,Newmont Corp,{'Tailings Storage Facilities Management': 'Th...,...,2023-02-08,2023-02-08T22:16:21+00:00,https://www.newsmax.com/newsmax-tv/fitzgerald-...,"[{'name': 'Newsmax', 'weight': 0.09317307}, {'...",[{'name': 'Politics'}],"[{'data': 'Fitzgerald', 'type': 'PERSON', 'men...","Rep. Scott Fitzgerald, R-Wis., told Newsmax We...",c12355d81050473e89f4163372441061,Rep. Fitzgerald to Newsmax: DirecTV Dropping N...,
3,12072,"Shareholders v. Tesla, Nasdaq's diversity rule...","{'positive': 0.02043453, 'negative': 0.6323841...",\n\nThe case is In re Tesla Inc Securities Lit...,Some of the biggest securities cases of 2023 a...,NDAQ,Financials,Security & Commodity Exchanges,Nasdaq Inc,{'Managing Conflicts of Interest': 'Security a...,...,2023-05-18,2023-05-18T14:28:52+00:00,https://www.axios.com/pro/media-deals/2023/05/...,"[{'name': 'Google AI', 'weight': 0.09959001}, ...",[{'name': 'Tech'}],"[{'data': 'YouTube', 'type': 'ORG', 'mentions'...",YouTube has embraced AI for causing a massive ...,fcbd16768c584451912d7121a259ad9d,YouTube praises AI transformation at Brandcast,
4,28164,"CFOs Boost Currency Protections, Extend Hedge ...","{'positive': 0.031100325, 'negative': 0.955758...","CFOs Boost Currency Protections, Extend Hedge ...","Coca-Cola, Kimberly-Clark and Prologis are amo...",KO,Food & Beverage,Non-Alcoholic Beverages,Coca-Cola Co,{'Water Management': 'Water management relates...,...,2023-05-04,2023-05-04T23:39:33+00:00,https://www.cnbc.com/2023/05/04/apples-q2-ease...,"[{'name': 'last year', 'weight': 0.0876609}, {...",[{'name': 'Tech'}],"[{'data': 'Apple', 'type': 'ORG', 'mentions': ...",Apple (AAPL) posted a better-than-expected Mar...,7a5fd04f52ef49298c35f0a86e614a4d,Apple's quarter eases concerns about mobile de...,


In [71]:
df.shape

(5216, 29)

In [72]:
df.columns

Index(['index', 'title & content', 'sentiment_perigon', 'summary',
       'description', 'Ticker', 'Sector', 'Industry', 'Company', 'SASB',
       'cosine_similarities', 'max_cosine_similarities', 'GPT_ESG_or_not',
       'GPT_firm_or_not', 'GPT_sentiment', 'GPT_topics', 'ESG_or_not',
       'firm_or_not', 'human_label_sentiment', 'pubDate_brief', 'pubDate',
       'url', 'keywords', 'categories', 'entities', 'content', 'articleId',
       'title', 'Unnamed: 28'],
      dtype='object')

In [73]:
# Want to know how many distinct tickers their are so I can pull those daily returns from the yahoo finance api

# Define column of interest
ticker_column = 'Ticker'

# Count the number of unique tickers
num_unique_tickers = df[ticker_column].nunique()

# List all the tickers
unique_tickers = df[ticker_column].unique()

# Print results
print(f"Number of unique Tickers: {num_unique_tickers}")
print("List of the Tickers:")
print(unique_tickers)

Number of unique Tickers: 353
List of the Tickers:
['MGM' 'MET' 'NEM' 'NDAQ' 'KO' 'RHI' 'UPS' 'ILMN' 'FDX' 'HLT' 'CTLT' 'DFS'
 'XEL' 'STZ' 'KMI' 'PKG' 'AMZN' 'GIS' 'NDSN' 'WMB' 'CSCO' 'IPG' 'GS' 'BG'
 'SBAC' 'MDT' 'WBD' 'CMG' 'ES' 'CVS' 'FSLR' 'PG' 'LVS' 'LLY' 'WMT' 'BALL'
 'DD' 'OMC' 'TSN' 'AVB' 'HAS' 'MPC' 'AAPL' 'MSFT' 'ABG.BE' 'GOOGL' 'HCA'
 'KR' 'JBHT' 'CCL' 'BAX' 'BAC' 'APD' 'PLD' 'MAR' 'ATO' 'TXN' 'COST' 'BLK'
 'CBOE' 'DIS' 'LNC' 'SPGI' 'ABT' 'SLB' 'PARA' 'AAL' 'LMT' 'FCX' 'BA'
 'ENPH' 'FMC' 'CBRE' 'TAP' 'HAL' 'PWR' 'EFX' 'CNC' 'ETSY' 'WM' 'COP' 'ALL'
 'NSC' 'UAL' 'UNH' 'LEN' 'MSCI' 'DE' 'LIN' 'PHM' 'ODFL' 'PFG' 'SBUX' 'COF'
 'DXCM' 'T' 'HRL' 'CMCSA' 'GD' 'WBA' 'INTC' 'ADM' 'STT' 'HUM' 'EQR' 'PCG'
 'KDP' 'PEP' 'GE' 'GM' 'AXP' 'ABBV' 'NFLX' 'BSX' 'UHS' 'NLSN' 'IT' 'MS'
 'CARR' 'RSG' 'DAL' 'MO' 'CVX' 'NKE' 'STLD' 'VZ' 'CAT' 'RCL' 'VLO' 'TSLA'
 'BKR' 'LYB' 'CZR' 'ICE' 'AMGN' 'MCD' 'CHTR' 'BRO' 'DHI' 'TMUS' 'AMD'
 'PGR' 'INVH' 'PSX' 'NUE' 'LYV' 'CMI' 'DLR' 'AMT' 'IVZ' 'SCHW' 'PFE' '

In [74]:
# Now that we know what stocks we are interested in, we should define a variable with this information so we can pull those Tickers information
# List of stock symbols
stock_symbols = ['MGM', 'MET','NEM', 'NDAQ', 'KO', 'RHI', 'UPS', 'ILMN', 'FDX', 'HLT', 'CTLT', 'DFS',
                 'XEL', 'STZ', 'KMI', 'PKG', 'AMZN', 'GIS', 'NDSN', 'WMB', 'CSCO', 'IPG', 'GS', 'BG',
                 'SBAC', 'MDT', 'WBD', 'CMG', 'ES', 'CVS', 'FSLR', 'PG', 'LVS', 'LLY', 'WMT', 'BALL',
                 'DD', 'OMC', 'TSN', 'AVB', 'HAS', 'MPC', 'AAPL', 'MSFT', 'ABG.BE', 'GOOGL', 'HCA',
                 'KR', 'JBHT', 'CCL', 'BAX', 'BAC', 'APD', 'PLD', 'MAR', 'ATO', 'TXN', 'COST', 'BLK',
                 'CBOE', 'DIS', 'LNC', 'SPGI', 'ABT', 'SLB', 'PARA', 'AAL', 'LMT', 'FCX', 'BA',
                 'ENPH', 'FMC', 'CBRE', 'TAP', 'HAL', 'PWR', 'EFX', 'CNC', 'ETSY', 'WM', 'COP', 'ALL',
                 'NSC', 'UAL', 'UNH', 'LEN', 'MSCI', 'DE', 'LIN', 'PHM', 'ODFL', 'PFG', 'SBUX', 'COF',
                 'DXCM', 'T', 'HRL', 'CMCSA', 'GD', 'WBA', 'INTC', 'ADM', 'STT', 'HUM', 'EQR', 'PCG',
                 'KDP', 'PEP', 'GE', 'GM', 'AXP', 'ABBV', 'NFLX', 'BSX', 'UHS', 'NLSN', 'IT', 'MS',
                 'CARR', 'RSG', 'DAL', 'MO', 'CVX', 'NKE', 'STLD', 'VZ', 'CAT', 'RCL', 'VLO', 'TSLA',
                 'BKR', 'LYB', 'CZR', 'ICE', 'AMGN', 'MCD', 'CHTR', 'BRO', 'DHI', 'TMUS', 'AMD',
                 'PGR', 'INVH', 'PSX', 'NUE', 'LYV', 'CMI', 'DLR', 'AMT', 'IVZ', 'SCHW', 'PFE', 'OKE',
                 'CAH', 'CL', 'F', 'ETN', 'IP', 'NCLH', 'ALK', 'HON', 'EW', 'SRE', 'AON', 'CTAS',
                 'SBNY', 'LH', 'HPE', 'KMB', 'IBM', 'RL', 'CPT', 'DGX', 'WRK', 'DVA', 'PM', 'MCK',
                 'ARE', 'FOXA', 'O' ,'CSX', 'JPM', 'PRU', 'CCI', 'GNRC', 'KHC', 'CME', 'HSY', 'DHR',
                 'LHX', 'EXR', 'TGT', 'AMCR', 'NVDA', 'GILD', 'LUV', 'EBAY', 'XOM', 'CRM', 'C', 'MU',
                 'BDX', 'BIIB', 'AMAT', 'HST', 'NI', 'EMN', 'DG', 'FE', 'OXY', 'EL', 'AIZ', 'EIX',
                 'EMR', 'DTE', 'TPR', 'UNP', 'WFC', 'MMM', 'D', 'SEE', 'EQIX', 'WYNN', 'SIVBQ',
                 'MRNA', 'SYK', 'COO', 'ED', 'DUK', 'VTR', 'WAB', 'SYY', 'DRI', 'CPB', 'K', 'CLX',
                 'DOW', 'APH', 'ADBE', 'TRGP', 'AFL', 'CAG', 'AAP', 'HPQ', 'ON', 'CI', 'CHRW', 'MDLZ',
                 'FRCB', 'ETR', 'ESS', 'TMO', 'VTRS', 'MCO', 'MNST', 'TDY', 'CF', 'CB', 'ORCL', 'VFC',
                 'ZBH', 'PAYX', 'CE', 'LRCX', 'BBY', 'SHW', 'FRT', 'NWSA', 'VRSK', 'BBWI', 'DLTR',
                 'MOS', 'PXD', 'SWK', 'BXP', 'PEAK', 'NOC', 'PCAR', 'TSCO', 'CHD', 'SEDG', 'TRV',
                 'SYF', 'HD', 'ACN', 'IDXX', 'URI', 'NRG', 'ROK', 'ADI', 'STX', 'GL', 'LOW', 'HSIC',
                 'TFX', 'WDC', 'POOL', 'PNC', 'BR', 'CSGP', 'WELL', 'BK', 'DISH', 'HIG', 'PPG', 'ECL',
                 'JNJ', 'YUM', 'CFG', 'AEP', 'RMD', 'KEY', 'AIG', 'EOG', 'MSI', 'AVY', 'MOH', 'AZO',
                 'ULTA', 'XYL', 'VRTX', 'STE', 'AMP', 'PANW', 'IEX', 'BMY', 'CMA', 'NTRS', 'KMX',
                 'MA', 'CRL', 'CNP', 'APTV', 'HBAN', 'NXPI', 'HES', 'ADSK','ISRG', 'RF', 'EVRG',
                 'FITB', 'EXC', 'AES', 'PEG', 'FTNT', 'INTU', 'TXT', 'AWK', 'MCHP', 'REG', 'APA',
                 'ZION', 'TFC']

In [75]:
# Empty df to store the data
all_stock_data = pd.DataFrame()

In [76]:
# Want to know the earliest and latest date in the original df so I can pull those dates information from the yahoo api

# Convert 'Date' column to datetime format to call min and max functions
df['pubDate_brief'] = pd.to_datetime(df['pubDate_brief'])

# Get the earliest and latest dates
earliest_date = df['pubDate_brief'].min()
latest_date = df['pubDate_brief'].max()

# Print results
print(f"Earliest Date: {earliest_date}")
print(f"Latest Date: {latest_date}")

Earliest Date: 2022-09-15 00:00:00
Latest Date: 2023-09-08 00:00:00


# Yahoo Finance API

In [77]:
# Loop through each stock symbol defined above and get data from the yahoo finance API data based on dates in previous code chunk
for i in stock_symbols:
    stock_data = yf.download(i, start='2022-09-15', end='2023-09-08')
    stock_data['Stock_Symbol'] = i

    # Reset the index to include 'Date' as a column
    stock_data.reset_index(inplace=True)

    all_stock_data = pd.concat([all_stock_data, stock_data])

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [78]:
# View stock data from yahoo api
all_stock_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Stock_Symbol
0,2022-09-15,33.950001,35.169998,33.880001,34.43,34.427162,5200300.0,MGM
1,2022-09-16,33.77,33.919998,32.919998,33.360001,33.35725,12743900.0,MGM
2,2022-09-19,32.84,34.450001,32.779999,34.360001,34.35717,4712200.0,MGM
3,2022-09-20,34.450001,34.889999,34.130001,34.57,34.56715,5708700.0,MGM
4,2022-09-21,34.540001,34.689999,32.759998,32.779999,32.777298,6233500.0,MGM


In [79]:
all_stock_data.shape

(86592, 8)

In [80]:
all_stock_data.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'Stock_Symbol'],
      dtype='object')

In [81]:
# Now calculate the daily returns

# Convert 'Date' column to datetime format
all_stock_data['Date'] = pd.to_datetime(all_stock_data['Date'])

# Sort the df by 'Stock_Symbol' and 'Date'
all_stock_data.sort_values(by=['Stock_Symbol', 'Date'], inplace=True)

# Calculate daily return
all_stock_data['Daily_Return'] = all_stock_data.groupby('Stock_Symbol')['Adj Close'].pct_change()

# Print the result
all_stock_data[['Date', 'Stock_Symbol', 'Adj Close', 'Daily_Return']]

Unnamed: 0,Date,Stock_Symbol,Adj Close,Daily_Return
0,2022-09-15,AAL,14.270000,
1,2022-09-16,AAL,13.750000,-0.036440
2,2022-09-19,AAL,14.210000,0.033455
3,2022-09-20,AAL,13.970000,-0.016889
4,2022-09-21,AAL,13.230000,-0.052971
...,...,...,...,...
241,2023-08-31,ZION,35.073166,0.014575
242,2023-09-01,ZION,36.001865,0.026479
243,2023-09-05,ZION,35.695595,-0.008507
244,2023-09-06,ZION,34.401344,-0.036258


# Merge Data

In [82]:
# Now combine the all_stock_data to the original df called df. Join based on date and stock symbol.

# Merge df on 'Date' and 'Stock_Symbol'
merged_df = pd.merge(df, all_stock_data, how='inner', left_on=['pubDate_brief', 'Ticker'], right_on=['Date', 'Stock_Symbol'])

# Print the resulting merged df
merged_df

Unnamed: 0,index,title & content,sentiment_perigon,summary,description,Ticker,Sector,Industry,Company,SASB,...,Unnamed: 28,Date,Open,High,Low,Close,Adj Close,Volume,Stock_Symbol,Daily_Return
0,12024,Delays Won‚Äö√Ñ√¥t Hurt Japan‚Äö√Ñ√¥s First Ca...,"{'positive': 0.026093118, 'negative': 0.923414...",Osaka Governor Hirofumi Yoshimura said that th...,Years of delay to plans for Japan‚Äö√Ñ√¥s firs...,MGM,Services,Casinos & Gaming,MGM Resorts International,{'Internal Controls on Money Laundering': 'By ...,...,,2023-05-18,42.110001,43.189999,42.029999,42.939999,42.939999,3588900.0,MGM,0.017777
1,20675,MetLife (MET) Could Be a Great Choice - Gettin...,"{'positive': 0.8553927, 'negative': 0.01334850...",MetLife (MET) is a Finance stock that has seen...,Dividends are one of the best benefits to bein...,MET,Financials,Insurance,Metlife Inc,{'Financed Emissions': 'Entities participating...,...,,2022-10-31,72.830002,73.570000,72.779999,73.209999,69.754471,4705700.0,MET,0.000957
2,33685,New York Cements Itself as the Gold Mining Cap...,"{'positive': 0.46477953, 'negative': 0.0338994...","This week, top-five producer AngloGold Ashanti...",(Bloomberg) -- The momentum has been building ...,NEM,Extractives & Minerals Processing,Metals & Mining,Newmont Corp,{'Tailings Storage Facilities Management': 'Th...,...,,2023-02-08,48.860001,49.139999,48.230000,48.349998,46.469631,5396900.0,NEM,-0.007594
3,54787,Newmont Corporation (NEM) Stock Sinks As Marke...,"{'positive': 0.105259955, 'negative': 0.783727...",Newmont Corporation (NEM) Stock Sinks As Marke...,"In the latest trading session, Newmont Corpora...",NEM,Extractives & Minerals Processing,Metals & Mining,Newmont Corp,{'Tailings Storage Facilities Management': 'Th...,...,,2023-02-08,48.860001,49.139999,48.230000,48.349998,46.469631,5396900.0,NEM,-0.007594
4,12072,"Shareholders v. Tesla, Nasdaq's diversity rule...","{'positive': 0.02043453, 'negative': 0.6323841...",\n\nThe case is In re Tesla Inc Securities Lit...,Some of the biggest securities cases of 2023 a...,NDAQ,Financials,Security & Commodity Exchanges,Nasdaq Inc,{'Managing Conflicts of Interest': 'Security a...,...,,2023-05-18,54.980000,55.299999,54.470001,55.220001,54.530140,1620200.0,NDAQ,0.005280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4652,144179,Southwest Budgets $1 Billion To Overhaul Its A...,"{'positive': 0.012419085, 'negative': 0.940450...",In an email sent to members of its frequent fl...,Its holiday meltdown will cost the airline up ...,LUV,Transportation,Airlines,Southwest Airlines Co,{'Competitive Behaviour': 'The Airlines indust...,...,,2023-01-18,37.630001,37.709999,35.799999,36.299999,35.493191,9224300.0,LUV,-0.019978
4653,23476,American Airlines Pilots' Union Calls Strike A...,"{'positive': 0.837888, 'negative': 0.012910396...",American Airlines Pilots' Union (NASDAQ: AAL) ...,The union representing American Airlines Group...,AAL,Transportation,Airlines,American Airlines Group Inc.,{'Competitive Behaviour': 'The Airlines indust...,...,,2023-03-10,15.980000,16.000000,15.110000,15.460000,15.460000,27280200.0,AAL,-0.026448
4654,67995,Ford recalling over 1.2 MILLION cars over 'ser...,"{'positive': 0.019378478, 'negative': 0.849320...",Ford has announced it is recalling over 1.2 mi...,Just a day after Honda announced a half a mill...,F,Transportation,Automobiles,Ford Motor Co,{'Product Safety': 'Driving is a risky activit...,...,,2023-03-17,11.700000,11.730000,11.120000,11.300000,10.870924,249885100.0,F,-0.043993
4655,67085,Ford Making EVs Means Turning the Clock Back 1...,"{'positive': 0.08535305, 'negative': 0.3189886...",Ford CEO Jim Farley discussed the company's st...,Ford Making EVs Means Turning the Clock Back 1...,F,Transportation,Automobiles,Ford Motor Co,{'Product Safety': 'Driving is a risky activit...,...,,2023-05-23,11.530000,11.910000,11.530000,11.730000,11.425536,56843800.0,F,0.008598


In [89]:
# Double check that it joined correctly. Lets look at a specific ticker and date
ticker = 'MGM'
date = '2023-05-18'

# Condition for filtering the df
condition = (merged_df['Stock_Symbol'] == ticker) & (merged_df['Date'] == date)

# Retrieve rows based on the condition
result_df = merged_df[condition]

# Print the resulting df
result_df

# Getting merged_df data to cross reference with two other df's: all_stock_data and df

Unnamed: 0,index,title & content,sentiment_perigon,summary,description,Ticker,Sector,Industry,Company,SASB,...,Unnamed: 28,Date,Open,High,Low,Close,Adj Close,Volume,Stock_Symbol,Daily_Return
0,12024,Delays Won‚Äö√Ñ√¥t Hurt Japan‚Äö√Ñ√¥s First Ca...,"{'positive': 0.026093118, 'negative': 0.923414...",Osaka Governor Hirofumi Yoshimura said that th...,Years of delay to plans for Japan‚Äö√Ñ√¥s firs...,MGM,Services,Casinos & Gaming,MGM Resorts International,{'Internal Controls on Money Laundering': 'By ...,...,,2023-05-18,42.110001,43.189999,42.029999,42.939999,42.939999,3588900.0,MGM,0.017777


In [90]:
# Condition for filtering the df
condition = (df['Ticker'] == ticker) & (df['pubDate_brief'] == date)

# Retrieve rows based on the condition
result_df = df[condition]

# Print the resulting df
result_df

Unnamed: 0,index,title & content,sentiment_perigon,summary,description,Ticker,Sector,Industry,Company,SASB,...,pubDate_brief,pubDate,url,keywords,categories,entities,content,articleId,title,Unnamed: 28
0,12024,Delays Won‚Äö√Ñ√¥t Hurt Japan‚Äö√Ñ√¥s First Ca...,"{'positive': 0.026093118, 'negative': 0.923414...",Osaka Governor Hirofumi Yoshimura said that th...,Years of delay to plans for Japan‚Äö√Ñ√¥s firs...,MGM,Services,Casinos & Gaming,MGM Resorts International,{'Internal Controls on Money Laundering': 'By ...,...,2023-05-18,2023-05-18T21:25:29+00:00,https://www.dailymail.co.uk/health/article-121...,[{'name': 'identifiable user health informatio...,[{'name': 'Health'}],"[{'data': 'Chinese', 'type': 'NORP', 'mentions...",A popular fertility app used by women to track...,7be6da0f9313404ca02afd000b0e787a,Fertility app used by 500k women sold private ...,


In [91]:
# Condition for filtering the df
condition = (all_stock_data['Stock_Symbol'] == ticker) & (all_stock_data['Date'] == date)

# Retrieve rows based on the condition
result_df = all_stock_data[condition]

# Print the resulting df
result_df

# Data is the same so it merged correctly

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Stock_Symbol,Daily_Return
169,2023-05-18,42.110001,43.189999,42.029999,42.939999,42.939999,3588900.0,MGM,0.017777


In [92]:
# Check out earliest and latest dates in merged_df. Should be same as the original df

# Get the earliest and latest dates
earliest_date_pubDate = merged_df['pubDate_brief'].min()
latest_date_pubDate = merged_df['pubDate_brief'].max()

earliest_date_Date = merged_df['Date'].min()
latest_date_Date = merged_df['Date'].max()

# Print results
print(f"Earliest Date: {earliest_date_pubDate}")
print(f"Latest Date: {latest_date_pubDate}")
print('---------------------------------------')
print(f"Earliest Date: {earliest_date_Date}")
print(f"Latest Date: {latest_date_Date}")

Earliest Date: 2022-09-15 00:00:00
Latest Date: 2023-09-07 00:00:00
---------------------------------------
Earliest Date: 2022-09-15 00:00:00
Latest Date: 2023-09-07 00:00:00


# Export Merged Data

In [93]:
# Save this new merged_df to excel

# Define the file path and name
excel_file_path = '/content/drive/MyDrive/Vandy/Second Year/Spring semester/NLP Asset Management/Project 1/daily_returns.xlsx'

# Save the df to Excel
merged_df.to_excel(excel_file_path, index=False)