In [64]:
## Authors: Samay Bhagat and Alexander Domilescu

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime

In [65]:
startdate = '2023-01-01'
enddate = '2023-10-31'

ticker_file = 'Tickers_Example.csv' 
tickers = pd.read_csv(ticker_file, header=None).values.flatten().tolist() 

In [66]:
# is_valid_currency takes a ticker name and ensures it's denominated in USD or CAD

def is_valid_currency(ticker):
    stock_info = yf.Ticker(ticker).info
    currency = stock_info.get('currency', '').upper() # retrieving the stock currency
    return currency == 'USD' or currency == 'CAD' # returns T/F based on currency

In [67]:
def get_stock_data(ticker):
    # get stock info
    stock_info = yf.Ticker(ticker).info
    
    # check if the currency is valid
    if is_valid_currency(ticker):
        # download historical data
        data = yf.download(ticker, start='2022-01-01', end='2023-01-01')
        
        # returning daily volume
        monthly_volume = data['Volume'].resample('D').sum()
        
        # create a dictionary for the ticker
        result = {
            'ticker': ticker,
            'historical_close_data': data['Close'].to_dict(),
            'info': stock_info,
            'volume': monthly_volume.to_dict()
        }
        return result
    else:
        print(f"Skipping {ticker}: Invalid currency.")
        return None

# accumulates a list of dictionaries through get_stock_data    
def process_tickers(ticker_list):
    result_list = []
    for ticker in ticker_list:
        stock_data = get_stock_data(ticker)
        if stock_data is not None:
            result_list.append(stock_data)
    return result_list

tickers = ['AAPL', 'MSFT', 'XYZ', 'GSY.TO']
result_list = process_tickers(tickers)

result_list

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
Skipping XYZ: Invalid currency.
[*********************100%%**********************]  1 of 1 completed


[{'ticker': 'AAPL',
  'historical_close_data': {Timestamp('2022-01-03 00:00:00'): 182.00999450683594,
   Timestamp('2022-01-04 00:00:00'): 179.6999969482422,
   Timestamp('2022-01-05 00:00:00'): 174.9199981689453,
   Timestamp('2022-01-06 00:00:00'): 172.0,
   Timestamp('2022-01-07 00:00:00'): 172.1699981689453,
   Timestamp('2022-01-10 00:00:00'): 172.19000244140625,
   Timestamp('2022-01-11 00:00:00'): 175.0800018310547,
   Timestamp('2022-01-12 00:00:00'): 175.52999877929688,
   Timestamp('2022-01-13 00:00:00'): 172.19000244140625,
   Timestamp('2022-01-14 00:00:00'): 173.07000732421875,
   Timestamp('2022-01-18 00:00:00'): 169.8000030517578,
   Timestamp('2022-01-19 00:00:00'): 166.22999572753906,
   Timestamp('2022-01-20 00:00:00'): 164.50999450683594,
   Timestamp('2022-01-21 00:00:00'): 162.41000366210938,
   Timestamp('2022-01-24 00:00:00'): 161.6199951171875,
   Timestamp('2022-01-25 00:00:00'): 159.77999877929688,
   Timestamp('2022-01-26 00:00:00'): 159.69000244140625,
   Ti

In [68]:
def filter_stocks(result_list, min_avg_volume=150000, min_trading_days=18):
    filtered_list = []

    for stock_data in result_list:
        ticker = stock_data['ticker']
        volume_data = stock_data['volume']

        # calculate average monthly volume
        monthly_avg_volume = {}
        for date, volume in volume_data.items():
            month = date.strftime('%Y-%m')
            if month not in monthly_avg_volume:
                monthly_avg_volume[month] = {'total_volume': 0, 'days_counted': 0}
            if volume > 0 or monthly_avg_volume[month]['days_counted'] < min_trading_days:
                monthly_avg_volume[month]['total_volume'] += volume
                monthly_avg_volume[month]['days_counted'] += 1

        # remove stocks with less than min_trading_days in any month or average volume less than min_avg_volume
        valid_months = [month for month, data in monthly_avg_volume.items() if data['days_counted'] >= min_trading_days]
        avg_monthly_volume = sum(monthly_avg_volume[month]['total_volume'] / data['days_counted'] for month, data in monthly_avg_volume.items() if data['days_counted'] > 0) / len(valid_months) if valid_months else 0

        if avg_monthly_volume >= min_avg_volume:
            filtered_list.append(stock_data)

    return filtered_list

filtered_result_list = filter_stocks(result_list)

filtered_result_list

[{'ticker': 'AAPL',
  'historical_close_data': {Timestamp('2022-01-03 00:00:00'): 182.00999450683594,
   Timestamp('2022-01-04 00:00:00'): 179.6999969482422,
   Timestamp('2022-01-05 00:00:00'): 174.9199981689453,
   Timestamp('2022-01-06 00:00:00'): 172.0,
   Timestamp('2022-01-07 00:00:00'): 172.1699981689453,
   Timestamp('2022-01-10 00:00:00'): 172.19000244140625,
   Timestamp('2022-01-11 00:00:00'): 175.0800018310547,
   Timestamp('2022-01-12 00:00:00'): 175.52999877929688,
   Timestamp('2022-01-13 00:00:00'): 172.19000244140625,
   Timestamp('2022-01-14 00:00:00'): 173.07000732421875,
   Timestamp('2022-01-18 00:00:00'): 169.8000030517578,
   Timestamp('2022-01-19 00:00:00'): 166.22999572753906,
   Timestamp('2022-01-20 00:00:00'): 164.50999450683594,
   Timestamp('2022-01-21 00:00:00'): 162.41000366210938,
   Timestamp('2022-01-24 00:00:00'): 161.6199951171875,
   Timestamp('2022-01-25 00:00:00'): 159.77999877929688,
   Timestamp('2022-01-26 00:00:00'): 159.69000244140625,
   Ti

In [None]:
# this returns a list of dictionaries, where the dictionaries hold the 
# 1. ticker name, 
# 2. historical data with only the CLOSE column 
# 3. stock info 
# 4. daily volume 