In [2]:
# Import necessary libraries
import pandas as pd
import yfinance as yf

In [3]:
# Load the dataset
stock_data= pd.read_csv('../data/raw_analyst_ratings.csv')

In [3]:
print(stock_data.head)

<bound method NDFrame.head of          Unnamed: 0                                           headline  \
0                 0            Stocks That Hit 52-Week Highs On Friday   
1                 1         Stocks That Hit 52-Week Highs On Wednesday   
2                 2                      71 Biggest Movers From Friday   
3                 3       46 Stocks Moving In Friday's Mid-Day Session   
4                 4  B of A Securities Maintains Neutral on Agilent...   
...             ...                                                ...   
1407323     1413844             Top Narrow Based Indexes For August 29   
1407324     1413845  Recap: Wednesday's Top Percentage Gainers and ...   
1407325     1413846  UPDATE: Oppenheimer Color on China Zenix Auto ...   
1407326     1413847  Oppenheimer Initiates China Zenix At Outperfor...   
1407327     1413848  China Zenix Auto International Opens For Tradi...   

                                                       url          publisher  \


In [4]:
# Prepare the Data
stock_data['date'] = pd.to_datetime(stock_data['date'], format='ISO8601')


In [5]:
print(stock_data)

         Unnamed: 0                                           headline  \
0                 0            Stocks That Hit 52-Week Highs On Friday   
1                 1         Stocks That Hit 52-Week Highs On Wednesday   
2                 2                      71 Biggest Movers From Friday   
3                 3       46 Stocks Moving In Friday's Mid-Day Session   
4                 4  B of A Securities Maintains Neutral on Agilent...   
...             ...                                                ...   
1407323     1413844             Top Narrow Based Indexes For August 29   
1407324     1413845  Recap: Wednesday's Top Percentage Gainers and ...   
1407325     1413846  UPDATE: Oppenheimer Color on China Zenix Auto ...   
1407326     1413847  Oppenheimer Initiates China Zenix At Outperfor...   
1407327     1413848  China Zenix Auto International Opens For Tradi...   

                                                       url          publisher  \
0        https://www.benzinga.

In [5]:
# Identify Stock Symbols and Date Ranges
date_ranges = stock_data.groupby('stock')['date'].agg(start_date='min', end_date='max')


In [6]:
print(date_ranges)

                     start_date                  end_date
stock                                                    
A     2009-04-29 00:00:00-04:00 2020-06-05 10:30:54-04:00
AA    2009-08-10 00:00:00-04:00 2020-06-09 10:52:15-04:00
AAC   2010-03-25 00:00:00-04:00 2019-10-25 16:09:59-04:00
AADR  2013-03-05 15:42:46-04:00 2013-09-06 15:06:35-04:00
AAL   2011-05-16 00:00:00-04:00 2020-06-10 11:21:01-04:00
...                         ...                       ...
ZTR   2009-08-10 00:00:00-04:00 2020-03-19 10:49:42-04:00
ZTS   2013-01-17 00:00:00-04:00 2020-06-11 10:22:31-04:00
ZU    2013-11-14 00:00:00-04:00 2020-01-09 14:34:23-04:00
ZUMZ  2009-11-16 00:00:00-04:00 2020-06-05 07:24:15-04:00
ZX    2011-05-12 00:00:00-04:00 2018-06-15 09:01:12-04:00

[6204 rows x 2 columns]


In [7]:
# Determine date ranges and fetch stock data
# Calculate the earliest and latest date for each stock to define the period for which to fetch stock prices
stock_date_ranges = stock_data.groupby('stock').agg({'date': ['min', 'max']})
stock_date_ranges.columns = ['earliest_date', 'latest_date']

In [9]:
print(stock_date_ranges)

                  earliest_date               latest_date
stock                                                    
A     2009-04-29 00:00:00-04:00 2020-06-05 10:30:54-04:00
AA    2009-08-10 00:00:00-04:00 2020-06-09 10:52:15-04:00
AAC   2010-03-25 00:00:00-04:00 2019-10-25 16:09:59-04:00
AADR  2013-03-05 15:42:46-04:00 2013-09-06 15:06:35-04:00
AAL   2011-05-16 00:00:00-04:00 2020-06-10 11:21:01-04:00
...                         ...                       ...
ZTR   2009-08-10 00:00:00-04:00 2020-03-19 10:49:42-04:00
ZTS   2013-01-17 00:00:00-04:00 2020-06-11 10:22:31-04:00
ZU    2013-11-14 00:00:00-04:00 2020-01-09 14:34:23-04:00
ZUMZ  2009-11-16 00:00:00-04:00 2020-06-05 07:24:15-04:00
ZX    2011-05-12 00:00:00-04:00 2018-06-15 09:01:12-04:00

[6204 rows x 2 columns]


In [None]:
# Use yfinance to download stock data for each stock based on the calculated date ranges
# Initialize an empty DataFrame to store all stock data

# Loop through date ranges and download stock data for each symbol
for index, row in stock_date_ranges.iterrows():
    stock_symbol = index
    start_date = row['earliest_date']
    end_date = row['latest_date']
    
    try:
        # Download stock data
        stock_data = yf.download(stock_symbol, start=start_date, end=end_date)
        
        # Add stock symbol as a column in the DataFrame
        stock_data['Symbol'] = stock_symbol
        
        # Append stock data to the DataFrame containing all stock data
        all_stock_data = pd.concat([all_stock_data, stock_data])
        
        
        print("Stock data for", stock_symbol, "downloaded successfully.")
    except Exception as e:
        print("Failed to download stock data for", stock_symbol, ":", str(e))

# Save all stock data to a single CSV file
all_stock_data.to_csv('all_stock_data.csv')
