In [2]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 15
### Team Member Names: Neil Zhang, Rahim Rehan, Krish Patel
### Team Strategy Chosen: Risk-Free (Market Beat, Market Meet, Risk-Free)

In [None]:
start_date = '2024-01-01'
end_date = '2025-11-12'

# Read ticker file, no header
tickers = pd.read_csv("Tickers_Example.csv", header=None)

def format_tickers(csv_file_path, ticker_column_name="Ticker"):
    """
    Reads a CSV of tickers, downloads data with yfinance, and returns
    one big DataFrame with:
      - only NYSE / TSX tickers
      - only tickers with avg daily volume >= 5000
      - only months that have at least 18 trading days
      - industry info attached to each row
    """

    #Reads ticker list from inputed CSV
    data_table = pd.read_csv(csv_file_path)

    # choose which column contains the ticker symbols
    if ticker_column_name in data_table.columns:
        raw_ticker_column = data_table[ticker_column_name]
    else:
        # if the given name is not found, assume the first column has the tickers
        raw_ticker_column = data_table.iloc[:, 0]

    # build a clean Python list of tickers (remove NaN and empty strings)
    ticker_list = []
    for cell in raw_ticker_column:
        if pd.isna(cell) == False:
            ticker_text = str(cell)
            ticker_text = ticker_text.strip()
            if ticker_text != "" and (ticker_text not in ticker_list):
                ticker_list.append(ticker_text)


    # Filtering through what meets requirments 
    # only allow these exchange codes (NYSE & TSX variants)
    allowed_exchange_codes = ["NYQ", "NYS", "NYSE", "TSX", "TOR", "TSE"]

    # this will store all rows for all valid tickers
    all_tickers_data = pd.DataFrame()

    # Processing each ticker one by one
    for ticker_symbol in ticker_list:

        is_valid_ticker = True

        yf_ticker_object = yf.Ticker(ticker_symbol)

        #Getting basic info for each stock(exchange and industry)
        ticker_info = yf_ticker_object.info

        exchange_code = None
        if "exchange" in ticker_info:
            exchange_code = ticker_info["exchange"]
        elif "market" in ticker_info:
            exchange_code = ticker_info["market"]
        elif "fullExchangeName" in ticker_info:
            exchange_code = ticker_info["fullExchangeName"]

        industry_name = None
        if "industry" in ticker_info:
            industry_name = ticker_info["industry"]

        # keep only NYSE / TSX tickers
        if exchange_code not in allowed_exchange_codes:
            is_valid_ticker = False

        # Downloading daily price + volume history
        if is_valid_ticker:
            price_history = yf_ticker_object.history(start=start_date, end=end_date)
        else:
            price_history = pd.DataFrame()

        # price_history must not be empty and must have a Volume column
        if is_valid_ticker:
            if ("Volume" not in price_history.columns) or (len(price_history) == 0):
                is_valid_ticker = False

        # Dropping months with fewer than 18 trading days
        if is_valid_ticker:
            price_history = price_history.copy()

            # Creating a "Month" column to help out later on in order to filter
            month_period_index = price_history.index.to_period("M")
            price_history["Month"] = month_period_index

            # Building list of unique months
            month_column = price_history["Month"]
            unique_months = []
            for month_value in month_column:
                if month_value not in unique_months:
                    unique_months.append(month_value)

            # find which months have at least 18 rows (trading days)
            months_with_enough_days = []
            for month_value in unique_months:
                day_count = 0
                for row_month in month_column:
                    if row_month == month_value:
                        day_count = day_count + 1
                if day_count >= 18:
                    months_with_enough_days.append(month_value)

            # keep only rows whose Month is in months_with_enough_days
            keep_row_mask = price_history["Month"].isin(months_with_enough_days)
            price_history = price_history[keep_row_mask]

            # if everything is dropped, ticker is no longer valid
            if len(price_history) == 0:
                is_valid_ticker = False

        # Filter by average daily volume >= 5000
        if is_valid_ticker:
            average_daily_volume = price_history["Volume"].mean()
            if average_daily_volume < 5000:
                is_valid_ticker = False

        # If ticker passes all filters, add to final DataFrame
        if is_valid_ticker:
            price_history = price_history.copy()

            # add ticker-level info as columns
            price_history["Ticker"]   = ticker_symbol
            price_history["Exchange"] = exchange_code
            price_history["Industry"] = industry_name

            # remove helper Month column from final output
            price_history = price_history.drop(columns=["Month"])

            # build the full table
            if len(all_tickers_data) == 0:
                all_tickers_data = price_history
            else:
                all_tickers_data = all_tickers_data.append(price_history)

    # Making everything clean and well sorted to output as return result
    if len(all_tickers_data) > 0:
        all_tickers_data = all_tickers_data.reset_index()
        if "Date" not in all_tickers_data.columns and "index" in all_tickers_data.columns:
            all_tickers_data = all_tickers_data.rename(columns={"index": "Date"})
    else:
        all_tickers_data = pd.DataFrame()

    return all_tickers_data

#--------------------#

# Function to return a list of all tickers (first column elements)
def get_ticker_list (tickers_df):
     return tickers_df.iloc[1:, 0].tolist()

ticker_list = (get_ticker_list(tickers)) # List of all tickers


# Gets weekly closes of all the stocks in a list of tickers
def get_weekly_closes (ticker_lst, start_date, end_date):
    #Define a dataframe to hold weekly close prices (checks every friday)
    weekly_closes = pd.DataFrame()
    #Extract the weekly close prices and store them in the dataframe
    for i in ticker_lst:
        ticker = yf.Ticker(i)
        data = ticker.history(start=start_date, end=end_date)
        data.index = pd.to_datetime(data.index) # ensure datetime index
        #last() takes the last trading price of the week
        prices = data['Close'].resample('W-FRI').last()
        weekly_closes[f'Close {i}'] = prices
    #Strip time
    weekly_closes.index = weekly_closes.index.strftime('%Y-%m-%d')
    return weekly_closes

weekly_closes = get_weekly_closes (ticker_list, start_date, end_date)

# Creates a df with the (weekly) %change for each column
def get_percent_change (closes, start_date, end_date):
    percent_change = pd.DataFrame()
    for i in closes:
        col_name = i[6:]
        #fill_method=None to deal with delisted stocks
        percent_change[f'% Change {col_name}'] = closes[i].pct_change(fill_method=None) * 100
    return percent_change

weekly_percent_change = get_percent_change (weekly_closes, start_date, end_date)


# Calculate covariance, correlation, variance, standard deviation
covariance_matrix = {
    'Covariance': weekly_percent_change.cov(),
    'Correlation': weekly_percent_change.corr(),
    'Variance': weekly_percent_change.var(),
    'Std_Dev': weekly_percent_change.std()}
"""
# Access each piece like:
display(covariance_matrix['Covariance'])
display(covariance_matrix['Std_Dev'])
"""

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.