In [57]:
# Provides ways to work with large multidimensional arrays
import numpy as np
# Allows for further data manipulation and analysis
import pandas as pd
from pandas_datareader import data as web # Reads stock data
import yfinance as yf
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

# Default Values

In [58]:
PATH = "/Users/jwiegand/Dev/jrwiegand/data/files"

# Start date defaults
S_YEAR = 2021
S_MONTH = 6
S_DAY = 1
S_DATE_STR = f"{S_YEAR}-{S_MONTH}-{S_DAY}"
S_DATE_DATETIME = dt.datetime(S_YEAR, S_MONTH, S_DAY)

# Start date defaults
E_YEAR = 2024
E_MONTH = 6
E_DAY = 1
E_DATE_STR = f"{E_YEAR}-{E_MONTH}-{E_DAY}"
E_DATE_DATETIME = dt.datetime(E_YEAR, E_MONTH, E_DAY)


# Manually Download the Screener CSV from Nasdaq
# https://www.nasdaq.com/market-activity/stocks/screener
CSV_DATA_FILE = "nasdaq_screener_1717356628553.csv"

# Get Stock File Names in a List

In [59]:
files = [x for x in listdir(PATH + "/stocks") if isfile(join(PATH + "/stocks", x))]
tickers = [os.path.splitext(x)[0] for x in files]

# Function that Returns a Dataframe from a CSV

In [60]:
def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(PATH + "/stocks/" + ticker + ".csv")
    except FileNotFoundError:
        print("File Does Not Exist")
    else:
        return df

# Function that Saves Dataframe to CSV

In [61]:
def save_dataframe_to_csv(df, ticker):
    df.to_csv(PATH + "/stocks/" + ticker + ".csv")

# Return Valid Dates in Dataframe

In [62]:
def get_valid_dates(df, sdate, edate):
    try:
        mask = (df["Date"] > sdate) & (df["Date"] <= edate)
        sm_df = df.loc[mask]
        sm_df = sm_df.set_index(["Date"])
        sm_date = sm_df.index.min()
        last_date = sm_df.index.max()

        date_leading = "-".join(("0" if len(x) < 2 else "") + x for x in sm_date.split("-"))
        date_ending = "-".join(("0" if len(x) < 2 else "") + x for x in last_date.split("-"))

    except Exception as e:
        print("Date Corrupted" + e)
    else:
        return date_leading, date_ending

# Returns Return on Investment over Time

In [63]:
def roi_between_dates(df, sdate, edate):
    try:
        start_val = df.loc[sdate, "Adj Close"]
        end_val = df.loc[edate, "Adj Close"]
        roi = ((end_val - start_val) / start_val)
    except Exception:
        print("Data Corrupted")
    else:
        return roi

# Get Mean Between Dates

In [64]:
def get_mean_between_dates(df, sdate, edate):
    mask = (df["Date"] > sdate) & (df["Date"] <= edate)
    return df.loc[mask]["Adj Close"].mean()

# Get Standard Deviation Between Dates

In [65]:
def get_sd_between_dates(df, sdate, edate):
    mask = (df["Date"] > sdate) & (df["Date"] <= edate)
    return df.loc[mask]["Adj Close"].std()

# Get Coefficient of Variation Between Dates

In [66]:
def get_cov_between_dates(df, sdate, edate):
    mean = get_mean_between_dates(df, sdate, edate)
    sd = get_sd_between_dates(df, sdate, edate)
    return sd / mean

# Test Functions

In [67]:
print(tickers[7000])
stock_a = get_df_from_csv(tickers[7000])
print(get_valid_dates(stock_a, S_DATE_STR, E_DATE_STR))
sdate, edate = get_valid_dates(stock_a, S_DATE_STR, E_DATE_STR)
print("Adj Close Mean: ", get_mean_between_dates(stock_a, sdate, edate))
print("Adj Close SD: ", get_sd_between_dates(stock_a, sdate, edate))
print("Adj Close COV: ", get_cov_between_dates(stock_a, sdate, edate))
stock_a = stock_a.set_index("Date")
print("Return on Investment: ", roi_between_dates(stock_a, sdate, edate))

PGR
('2022-01-03', '2024-05-31')
Adj Close Mean:  140.2723646936338
Adj Close SD:  29.601901390659616
Adj Close COV:  0.2110315988135836
Return on Investment:  1.0724063086097624


# COV & ROI for All Stocks Over Defined Period

In [68]:
def get_cov_ror(tickers, sdate, edate):
    col_names = ["Tickers", "COV", "ROI"]
    df = pd.DataFrame(columns = col_names)
    for ticker in tickers:
        print("Working on: ", ticker)
        s_df = get_df_from_csv(ticker)
        sdate, edate = get_valid_dates(s_df, sdate, edate)
        cov = get_cov_between_dates(s_df, sdate, edate)
        s_df = s_df.set_index(["Date"])
        roi = roi_between_dates(s_df, sdate, edate)
        df.loc[len(df.index)] = [ticker, cov, roi]
    return df

In [70]:
market_df = get_cov_ror(tickers, S_DATE_STR, E_DATE_STR)

Working on:  RIV
Working on:  ANTE
Working on:  BRZE
Working on:  CSCO
Working on:  PRI
Working on:  TRAK
Working on:  NZF
Working on:  TYRA
Working on:  AIRC
Working on:  WTO
Working on:  HLNE
Working on:  HUBS
Working on:  NVCT
Working on:  COF^N


TypeError: can only concatenate str (not "AttributeError") to str