In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import pytz

In [2]:
def removeSeconds(df):
    modified_dates=[]
    for date in df['Date']:
        format = '%Y-%m-%d %H:%M:%S%z'
        date = datetime.strptime(date, format)
        modified_date = date.replace(second=0)
        modified_dates.append(modified_date)
    df['Date']=modified_dates
    return df

In [3]:
def convertTimezone(timezone, df,name):
    """
    Converting timestamps in to a specified timezone. 

    Arguments:
        timezone (str): Target timezone to convert timestamps to.
        df : DataFrame containing a column of timestamp strings.
        name: name of the timestamp column

    Returns a dataFrame with timestamps converted to the specified timezone.
    """

    target_tz = pytz.timezone(timezone)

    if df.index is not None:
        # Convert the index to a timezone-aware index
        df.index = pd.to_datetime(df.index, utc=True).tz_convert(timezone)

    else:
        # Convert timestamp column to datetime objects
        df[name] = pd.to_datetime(df[name],utc=True)
        df[name] = df[name].dt.tz_convert(target_tz)
    return df

In [4]:
def getStockPrices(start_date,end_date):

    start_date = datetime.strptime(start_date, "%Y-%m-%d")
    end_date = datetime.strptime(end_date, "%Y-%m-%d")
    interval = "1m"                     

    current_date = start_date
    # Initialize an empty list to store data
    data_list = []  

    while current_date < end_date:
        remaining_days = (end_date - current_date).days
        
        if remaining_days < 7:
            # If remaining days are less than 7, get data till end_date
            data = yf.download("AAPL", start=current_date, end=end_date, interval=interval)
            data = convertTimezone('UTC', data, 'Datetime')
            # Append data to the list
            data_list.append(data)
            current_date = end_date
        else:
            # If remaining days are 7 or more, get data for the next 7 days
            next_date = current_date + timedelta(days=7)
            data = yf.download("AAPL", start=current_date, end=next_date, interval=interval)
            data = convertTimezone('UTC', data, 'Datetime')
            # Append data to the list
            data_list.append(data)
            current_date = next_date

    # Concatenate the list of dataframes into a single dataframe
    final_data = pd.concat(data_list)
    final_data.reset_index(inplace=True)
    return final_data


In [5]:
def addStockPrices(stockPrices,df):

    # df=removeSeconds(df)

    stockPrices['Datetime'] = pd.to_datetime(stockPrices['Datetime'])
    stockPrices['Datetime'] = stockPrices['Datetime'].dt.strftime('%Y-%m-%d %H:%M:%S')

    df['Date'] = pd.to_datetime(df['Date'])
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d %H:%M:%S')

    # store stock prices
    stock_prices = []

    # Iterate through each row in the News dataset
    for index, row in df.iterrows():
        publication_date = row['Date']
        # Check if the publication date exists in the stockPrice dataset
        if publication_date in stockPrices['Datetime'].values:
            # Retrieve the corresponding stock price
            stock_price = stockPrices.loc[stockPrices['Datetime'] == publication_date, 'Adj Close'].values[0]
            stock_prices.append(stock_price)
        else:
            stock_prices.append(None)  # If publication date not found, append None

    # Add the list of stock prices to the News dataset as a new column
    df['stock_price'] = stock_prices
    return df

In [6]:
def addStockPricesAfter(stockPrices,df,time):

    # df=removeSeconds(df)

    stockPrices['Datetime'] = pd.to_datetime(stockPrices['Datetime'])
    stockPrices['Datetime'] = stockPrices['Datetime'].dt.strftime('%Y-%m-%d %H:%M:%S')

    df['Date'] = pd.to_datetime(df['Date'])
    df['Date']+=timedelta(minutes=time)
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d %H:%M:%S')

    # store stock prices
    stock_prices = []

    # Iterate through each row in the News dataset
    for index, row in df.iterrows():
        publication_date = row['Date']
        # Check if the publication date exists in the stockPrice dataset
        if publication_date in stockPrices['Datetime'].values:
            # Retrieve the corresponding stock price
            stock_price = stockPrices.loc[stockPrices['Datetime'] == publication_date, 'Adj Close'].values[0]
            stock_prices.append(stock_price)
        else:
            stock_prices.append(None)  # If publication date not found, append None

    # Add the list of stock prices to the News dataset as a new column
    df[f'stock_price_after_{time}_mins'] = stock_prices
    return df

In [7]:
def turnToCSV(df,name):
    df.to_csv(fr'C:\Users\Legion\Desktop\FinalYearProject\data\{name}.csv')

In [8]:
News=pd.read_csv(r'C:\Users\Legion\Desktop\FinalYearProject\data\News.csv')


In [9]:
start_date = News['Date'].min().split()[0] 
end_date =  News['Date'].max().split()[0]  
stockPrices=getStockPrices(start_date,end_date)
turnToCSV(stockPrices,'stockPrices')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [10]:
News=removeSeconds(News)

In [11]:
News=addStockPrices(stockPrices,News)
News= News.dropna(subset=['stock_price'])
turnToCSV(News,'NewsWithStockPrice')

In [12]:
News

Unnamed: 0,Date,article_title,article,source_name,source_link,stock_price
13,2024-03-28 20:00:00,Mag 7 Stock Teases Breakout — While Nvidia Cli...,"As Mag 7 stocks like Nvidia and Meta soar, Goo...",Yahoo Finance,https://finance.yahoo.com/quote/AAPL/m/403395f...,171.479996
14,2024-03-28 19:33:00,Magnificent Seven Stocks To Buy And Watch: App...,"Magnificent Seven stocks, including AI leader ...",Yahoo Finance,https://finance.yahoo.com/quote/AAPL/m/4205eaa...,172.149994
15,2024-03-28 19:15:00,"Apple's new iPad Pro likely to launch in May, ...",(Reuters) - Apple's overseas suppliers have in...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/appl...,171.949997
16,2024-03-28 17:47:00,Apple Plans New iPad Pro for May as Production...,(Bloomberg) -- Apple Inc.’s overseas suppliers...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/appl...,171.264999
17,2024-03-28 17:45:00,16 Most Profitable Tech Stocks To Invest In,"In this piece, we will take a look at the 16 m...",Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/16-m...,171.229996
...,...,...,...,...,...,...
559,2024-03-05 17:10:00,"Goodbye, Project Titan","In this podcast, Motley Fool analyst Asit Shar...",Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/good...,170.503799
560,2024-03-05 16:59:00,Apple Stock Gloom Deepens as Pressure to Show ...,(Bloomberg) -- Apple Inc.’s move to shutter it...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/appl...,170.259201
561,2024-03-05 16:55:00,What the shakeup in Magnificent 7 stocks means...,Stock market indices have been propped up on t...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/video/sha...,170.160004
562,2024-03-05 16:46:00,Alphabet (GOOGL) Enhances Google Maps With New...,Alphabet’s GOOGL Google is gaining strong mome...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/alph...,169.865005


In [13]:
News=addStockPricesAfter(stockPrices,News,30)
News= News.dropna(subset=['stock_price_after_30_mins'])
turnToCSV(News,'NewsWithStockPrice')

In [14]:
News

Unnamed: 0,Date,article_title,article,source_name,source_link,stock_price,stock_price_after_30_mins
15,2024-03-28 19:45:00,"Apple's new iPad Pro likely to launch in May, ...",(Reuters) - Apple's overseas suppliers have in...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/appl...,171.949997,171.964996
16,2024-03-28 18:17:00,Apple Plans New iPad Pro for May as Production...,(Bloomberg) -- Apple Inc.’s overseas suppliers...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/appl...,171.264999,171.604996
17,2024-03-28 18:15:00,16 Most Profitable Tech Stocks To Invest In,"In this piece, we will take a look at the 16 m...",Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/16-m...,171.229996,171.705002
18,2024-03-28 15:52:00,Magnificent Seven Stocks To Buy And Watch: App...,"Magnificent Seven stocks, including AI leader ...",Yahoo Finance,https://finance.yahoo.com/quote/AAPL/m/4205eaa...,170.705002,170.994995
19,2024-03-28 15:19:00,"Apple-Supplier Foxconn Hits Record High, and O...",Taiwan's Foxconn hit a record closing high on ...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/m/d8d8576...,170.789993,170.789993
...,...,...,...,...,...,...,...
559,2024-03-05 17:40:00,"Goodbye, Project Titan","In this podcast, Motley Fool analyst Asit Shar...",Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/good...,170.503799,170.317596
560,2024-03-05 17:29:00,Apple Stock Gloom Deepens as Pressure to Show ...,(Bloomberg) -- Apple Inc.’s move to shutter it...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/appl...,170.259201,170.785004
561,2024-03-05 17:25:00,What the shakeup in Magnificent 7 stocks means...,Stock market indices have been propped up on t...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/video/sha...,170.160004,170.804993
562,2024-03-05 17:16:00,Alphabet (GOOGL) Enhances Google Maps With New...,Alphabet’s GOOGL Google is gaining strong mome...,Yahoo Finance,https://finance.yahoo.com/quote/AAPL/news/alph...,169.865005,170.692505
