In [1]:
## Authors: Samay Bhagat and Alexander Domilescu

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime

In [2]:
startdate = '2023-01-01'
enddate = '2023-10-31'

ticker_file = 'Tickers_Example.csv' 
tickers = pd.read_csv(ticker_file, header=None).values.flatten().tolist() 

In [3]:
# is_valid_currency takes a ticker name and ensures it's denominated in USD or CAD

def is_valid_currency(ticker):
    stock_info = yf.Ticker(ticker).info
    currency = stock_info.get('currency', '').upper() # retrieving the stock currency
    return currency == 'USD' or currency == 'CAD' # returns T/F based on currency

In [4]:
def get_stock_data(ticker):
    # get stock info
    stock_info = yf.Ticker(ticker).info
    
    # check if the currency is valid
    if is_valid_currency(ticker):
        # download historical data
        data = yf.download(ticker, start='2022-01-01', end='2023-01-01')
        
        # returning daily volume
        monthly_volume = data['Volume'].resample('D').sum()
        
        # create a dictionary for the ticker
        result = {
            'ticker': ticker,
            'info': stock_info,
            'close': pd.DataFrame(data['Close']),
            'volume': pd.DataFrame(monthly_volume)
        }
        return result
    else:
        print(f"Skipping {ticker}: Invalid currency.")
        return None

# accumulates a list of dictionaries through get_stock_data    
def process_tickers(ticker_list):
    result_list = []
    for ticker in ticker_list:
        stock_data = get_stock_data(ticker)
        if stock_data is not None:
            result_list.append(stock_data)
    return result_list

tickers = ['AAPL', 'XYZ', 'GSY.TO'] # example; By the end, only AAPL should remain. 
# XYZ is an invalid ticker and GSY.TO has less than the avg volume requirement. In a real case, remove this:
# the file name should be stored in the second code block

result_list = process_tickers(tickers)

result_list

[*********************100%%**********************]  1 of 1 completed
Skipping XYZ: Invalid currency.
[*********************100%%**********************]  1 of 1 completed


[{'ticker': 'AAPL',
  'info': {'address1': 'One Apple Park Way',
   'city': 'Cupertino',
   'state': 'CA',
   'zip': '95014',
   'country': 'United States',
   'phone': '408 996 1010',
   'website': 'https://www.apple.com',
   'industry': 'Consumer Electronics',
   'industryKey': 'consumer-electronics',
   'industryDisp': 'Consumer Electronics',
   'sector': 'Technology',
   'sectorKey': 'technology',
   'sectorDisp': 'Technology',
   'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digi

In [5]:
def filter_stocks(result_list, min_avg_volume=150000, min_trading_days=18):
    filtered_list = []

    for stock_data in result_list:
        ticker = stock_data['ticker']
        volume_data = stock_data['volume']

        # calculate average monthly volume
        monthly_avg_volume = {}
        for date, volume in volume_data.iterrows():
            month = date.strftime('%Y-%m')
            if month not in monthly_avg_volume:
                monthly_avg_volume[month] = {'total_volume': 0, 'days_counted': 0}
            if volume['Volume'] > 0 or monthly_avg_volume[month]['days_counted'] < min_trading_days:
                monthly_avg_volume[month]['total_volume'] += volume['Volume']
                monthly_avg_volume[month]['days_counted'] += 1

        # remove stocks with less than min_trading_days in any month or average volume less than min_avg_volume
        valid_months = [month for month, data in monthly_avg_volume.items() if data['days_counted'] >= min_trading_days]
        avg_monthly_volume = sum(monthly_avg_volume[month]['total_volume'] / data['days_counted'] for month, data in monthly_avg_volume.items() if data['days_counted'] > 0) / len(valid_months) if valid_months else 0

        if avg_monthly_volume >= min_avg_volume:
            filtered_list.append(stock_data)

    return filtered_list

filtered_list = filter_stocks(result_list)

filtered_list

[{'ticker': 'AAPL',
  'info': {'address1': 'One Apple Park Way',
   'city': 'Cupertino',
   'state': 'CA',
   'zip': '95014',
   'country': 'United States',
   'phone': '408 996 1010',
   'website': 'https://www.apple.com',
   'industry': 'Consumer Electronics',
   'industryKey': 'consumer-electronics',
   'industryDisp': 'Consumer Electronics',
   'sector': 'Technology',
   'sectorKey': 'technology',
   'sectorDisp': 'Technology',
   'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digi

In [6]:
# this returns a list of dictionaries, where the dictionaries hold the 
# 1. ticker name, 
# 2. pandas df : historical data with only the CLOSE column 
# 3. stock info 
# 4. pandas df : daily volume 

# notice:
breakdown = [len(tickers), len(result_list), len(filtered_list)]
breakdown 

# the lengths go from 3 to 2 to 1. initially we input a list with 3 stocks, but then XYZ is filtered out due 
# to not having a valid currency. in the next part, GSY.TO is filtered out since it doesn't meet the volume req. 
# the resulting list only holds the dictionary for AAPL, hence length 1

[3, 2, 1]