In [189]:
# Provides ways to work with large multidimensional arrays
import numpy as np
# Allows for further data manipulation and analysis
import pandas as pd
from pandas_datareader import data as web # Reads stock data
import yfinance as yf
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

# Default Values

In [190]:
PATH = "/Users/jwiegand/Dev/jrwiegand/data/files"

# Start date defaults
S_YEAR = 2021
S_MONTH = 6
S_DAY = 1
S_DATE_STR = f"{S_YEAR}-{S_MONTH}-{S_DAY}"
S_DATE_DATETIME = dt.datetime(S_YEAR, S_MONTH, S_DAY)

# Start date defaults
E_YEAR = 2024
E_MONTH = 6
E_DAY = 1
E_DATE_STR = f"{E_YEAR}-{E_MONTH}-{E_DAY}"
E_DATE_DATETIME = dt.datetime(E_YEAR, E_MONTH, E_DAY)


# Manually Download the Screener CSV from Nasdaq
# https://www.nasdaq.com/market-activity/stocks/screener
CSV_DATA_FILE = "nasdaq_screener_1717356628553.csv"

# Get Stock File Names in a List

In [191]:
files = [x for x in listdir(PATH + "/stocks") if isfile(join(PATH + "/stocks", x))]
tickers = [os.path.splitext(x)[0] for x in files]

# Function that Returns a Dataframe from a CSV

In [192]:
def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(PATH + "/stocks/" + ticker + ".csv")
    except FileNotFoundError:
        print("File Does Not Exist")
    else:
        return df

# Function that Saves Dataframe to CSV

In [193]:
def save_dataframe_to_csv(df, ticker):
    df.to_csv(PATH + "/stocks/" + ticker + ".csv")

# Return Valid Dates in Dataframe

In [194]:
def get_valid_dates(df, sdate, edate):
    try:
        mask = (df["Date"] > sdate) & (df["Date"] <= edate)
        sm_df = df.loc[mask]
        sm_df = sm_df.set_index(["Date"])
        sm_date = sm_df.index.min()
        last_date = sm_df.index.max()

        print("VALUES", sm_date, last_date)

        date_leading = "-".join(("0" if len(x) < 2 else "") + x for x in sm_date.split("-"))
        date_ending = "-".join(("0" if len(x) < 2 else "") + x for x in last_date.split("-"))

    except Exception as e:
        print("Date Corrupted" + e)
    else:
        return date_leading, date_ending

# Returns Return on Investment over Time

In [195]:
def roi_between_dates(df, sdate, edate):
    try:
        start_val = df.loc[sdate, "Adj Close"]
        end_val = df.loc[edate, "Adj Close"]
        roi = ((end_val - start_val) / start_val)
    except Exception:
        print("Data Corrupted")
    else:
        return roi

# Get Mean Between Dates

In [196]:
def get_mean_between_dates(df, sdate, edate):
    mask = (df["Date"] > sdate) & (df["Date"] <= edate)
    return df.loc[mask]["Adj Close"].mean()

# Get Standard Deviation Between Dates

In [197]:
def get_sd_between_dates(df, sdate, edate):
    mask = (df["Date"] > sdate) & (df["Date"] <= edate)
    return df.loc[mask]["Adj Close"].std()

# Get Coefficient of Variation Between Dates

In [198]:
def get_cov_between_dates(df, sdate, edate):
    mean = get_mean_between_dates(df, sdate, edate)
    sd = get_sd_between_dates(df, sdate, edate)
    return sd / mean

# Test Functions

In [199]:
print(tickers[1000])
stock_a = get_df_from_csv(tickers[1000])
print(get_valid_dates(stock_a, S_DATE_STR, E_DATE_STR))
sdate, edate = get_valid_dates(stock_a, S_DATE_STR, E_DATE_STR)
print("Adj Close Mean: ", get_mean_between_dates(stock_a, sdate, edate))
print("Adj Close SD: ", get_sd_between_dates(stock_a, sdate, edate))
print("Adj Close COV: ", get_cov_between_dates(stock_a, sdate, edate))
stock_a = stock_a.set_index("Date")
print("Return on Investment: ", roi_between_dates(stock_a, sdate, edate))

SMMT
VALUES 2022-01-03 2024-05-31
('2022-01-03', '2024-05-31')
VALUES 2022-01-03 2024-05-31
Adj Close Mean:  2.2986247927689356
Adj Close SD:  1.2035238354840803
Adj Close COV:  0.5235842923430357
Return on Investment:  2.079787455749891


# COV & ROI for All Stocks Over Defined Period

In [200]:
def get_cov_ror(tickers, sdate, edate):
    col_names = ["Tickers", "COV", "ROI"]
    df = pd.DataFrame(columns = col_names)
    index = 0
    for ticker in tickers:
        print("Working on: ", ticker, index)
        s_df = get_df_from_csv(ticker)
        sdate, edate = get_valid_dates(s_df, sdate, edate)
        cov = get_cov_between_dates(s_df, sdate, edate)
        s_df = s_df.set_index(["Date"])
        roi = roi_between_dates(s_df, sdate, edate)
        df.loc[len(df.index)] = [ticker, cov, roi]
        index += 1
    return df

In [201]:
market_df = get_cov_ror(tickers, S_DATE_STR, E_DATE_STR)

Working on:  RIV 0
VALUES 2022-01-03 2024-05-31
Working on:  ANTE 1
VALUES 2022-01-04 2024-05-31
Working on:  BRZE 2
VALUES 2022-01-05 2024-05-31
Working on:  CSCO 3
VALUES 2022-01-06 2024-05-31
Working on:  PRI 4
VALUES 2022-01-07 2024-05-31
Working on:  TRAK 5
VALUES 2022-01-10 2024-05-31
Working on:  NZF 6
VALUES 2022-01-11 2024-05-31
Working on:  TYRA 7
VALUES 2022-01-12 2024-05-31
Working on:  AIRC 8
VALUES 2022-01-13 2024-05-31
Working on:  WTO 9
VALUES 2022-01-14 2024-05-31
Working on:  HLNE 10
VALUES 2022-01-18 2024-05-31
Working on:  HUBS 11
VALUES 2022-01-19 2024-05-31
Working on:  NVCT 12
VALUES 2022-02-04 2024-05-31
Working on:  SBGI 13
VALUES 2022-02-07 2024-05-31
Working on:  HTLFP 14
VALUES 2022-02-08 2024-05-31
Working on:  UFCS 15
VALUES 2022-02-09 2024-05-31
Working on:  TEAF 16
VALUES 2022-02-10 2024-05-31
Working on:  AIRT 17
VALUES 2022-02-11 2024-05-31
Working on:  SITC 18
VALUES 2022-02-14 2024-05-31
Working on:  TLIS 19
VALUES 2022-02-15 2024-05-31
Working on:  

TypeError: can only concatenate str (not "AttributeError") to str