In [2]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 15
### Team Member Names: Neil Zhang, Rahim Rehan, Krish Patel
### Team Strategy Chosen: Risk-Free (Market Beat, Market Meet, Risk-Free)

In [1]:
#CHANGE COMMMENTS BACK TO ORINGINAL

START_DATE = "2024-10-1"
END_DATE   = "2025-9-30"

def format_tickers(csv_file_path, ticker_column_name="Ticker"):
    """
    Reads a CSV of tickers, downloads data with yfinance, and returns
    one big DataFrame with:
      - only NYSE / TSX tickers
      - only tickers with avg daily volume >= 5000
      - only months that have at least 18 trading days
      - industry info attached to each row
    """

    #Reads ticker list from inputed CSV
    data_table = pd.read_csv(csv_file_path)

    # choose which column contains the ticker symbols
    if ticker_column_name in data_table.columns:
        raw_ticker_column = data_table[ticker_column_name]
    else:
        # if the given name is not found, assume the first column has the tickers
        raw_ticker_column = data_table.iloc[:, 0]

    # build a clean Python list of tickers (remove NaN and empty strings)
    ticker_list = []
    for cell in raw_ticker_column:
        if pd.isna(cell) == False:
            ticker_text = str(cell)
            ticker_text = ticker_text.strip()
            if ticker_text != "" and (ticker_text not in ticker_list):
                ticker_list.append(ticker_text)

    # Filtering through what meets requirments 
    # only allow these exchange codes (NYSE & TSX variants)
    allowed_exchange_codes = ["NYQ", "NYS", "NYSE", "TSX", "TOR", "TSE"]

    # this will store all rows for all valid tickers
    valid_tickers  = []   # first column
    valid_sectors  = []   # second column
    valid_currency = []   # third column
    valid_exchange = []   # fourth column  <-- NEW

    # Processing each ticker one by one
    for ticker_symbol in ticker_list:

        is_valid_ticker = True

        yf_ticker_object = yf.Ticker(ticker_symbol)

        #Getting basic info for each stock(exchange and industry)
        ticker_info = yf_ticker_object.info

        exchange_code = None
        if "exchange" in ticker_info:
            exchange_code = ticker_info["exchange"]
        elif "market" in ticker_info:
            exchange_code = ticker_info["market"]
        elif "fullExchangeName" in ticker_info:
            exchange_code = ticker_info["fullExchangeName"]

        # grab sector info for the summary output
        sector_name = None
        if "sector" in ticker_info:
            sector_name = ticker_info["sector"]

        # grab currency info for the summary output
        currency_name = None
        if "currency" in ticker_info:
            currency_name = ticker_info["currency"]

        # keep only NYSE / TSX tickers
        if exchange_code not in allowed_exchange_codes:
            is_valid_ticker = False

        # Downloading daily price + volume history
        if is_valid_ticker:
            price_history = yf_ticker_object.history(start=START_DATE, end=END_DATE)
        else:
            price_history = pd.DataFrame()

        # price_history must not be empty and must have a Volume column
        if is_valid_ticker:
            if ("Volume" not in price_history.columns) or (len(price_history) == 0):
                is_valid_ticker = False

        # Dropping months with fewer than 18 trading days
        if is_valid_ticker:
            price_history = price_history.copy()

            # strip timezone from index first
            if price_history.index.tz is not None:
                price_history.index = price_history.index.tz_localize(None)

            # Creating a "Month" column to help out later on in order to filter
            month_period_index = price_history.index.to_period("M")
            price_history["Month"] = month_period_index

            # Building list of unique months
            month_column = price_history["Month"]
            unique_months = []
            for month_value in month_column:
                if month_value not in unique_months:
                    unique_months.append(month_value)

            # find which months have at least 18 rows (trading days)
            months_with_enough_days = []
            for month_value in unique_months:
                day_count = 0
                for row_month in month_column:
                    if row_month == month_value:
                        day_count = day_count + 1
                if day_count >= 18:
                    months_with_enough_days.append(month_value)

            # keep only rows whose Month is in months_with_enough_days
            keep_row_mask = price_history["Month"].isin(months_with_enough_days)
            price_history = price_history[keep_row_mask]

            # if everything is dropped, ticker is no longer valid
            if len(price_history) == 0:
                is_valid_ticker = False

        # Filter by average daily volume >= 5000
        if is_valid_ticker:
            average_daily_volume = price_history["Volume"].mean()
            if average_daily_volume < 5000:
                is_valid_ticker = False

        # If ticker passes all filters, add to final DataFrame
        if is_valid_ticker:
            valid_tickers.append(ticker_symbol)
            valid_sectors.append(sector_name)
            valid_currency.append(currency_name)
            valid_exchange.append(exchange_code)

    # Making everything clean and well sorted to output as return result
    if len(valid_tickers) > 0:
        all_tickers_data = pd.DataFrame({
            "Ticker":   valid_tickers,
            "Sector":   valid_sectors,
            "Currency": valid_currency,
            "Exchange": valid_exchange
        })
        all_tickers_data = all_tickers_data.reset_index(drop=True)
    else:
        all_tickers_data = pd.DataFrame(columns=["Ticker", "Sector", "Currency", "Exchange"])

    return all_tickers_data

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.