In [2]:
# Provides ways to work with large multidimensional arrays
import numpy as np
# Allows for further data manipulation and analysis
import polars as pd
from pandas_datareader import data as web # Reads stock data
import yfinance as yf
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

# Default Values

In [3]:
PATH = "/Users/jwiegand/Dev/jrwiegand/data/files"

# Start date defaults
S_YEAR = 2021
S_MONTH = 6
S_DAY = 1
S_DATE_STR = f"{S_YEAR}-{S_MONTH}-{S_DAY}"
S_DATE_DATETIME = dt.datetime(S_YEAR, S_MONTH, S_DAY)

# Start date defaults
E_YEAR = 2024
E_MONTH = 6
E_DAY = 1
E_DATE_STR = f"{E_YEAR}-{E_MONTH}-{E_DAY}"
E_DATE_DATETIME = dt.datetime(E_YEAR, E_MONTH, E_DAY)


# Manually Download the Screener CSV from Nasdaq
# https://www.nasdaq.com/market-activity/stocks/screener
CSV_DATA_FILE = "nasdaq_screener_1717356628553.csv"

# Get Stock File Names in a List

In [4]:
files = [x for x in listdir(PATH + "/stocks") if isfile(join(PATH + "/stocks", x))]
tickers = [os.path.splitext(x)[0] for x in files]

# Function that Returns a Dataframe from a CSV

In [5]:
def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(PATH + "/stocks/" + ticker + ".csv")
    except FileNotFoundError:
        print("File Does Not Exist")
    else:
        return df

# Function that Saves Dataframe to CSV

In [6]:
def save_dataframe_to_csv(df, ticker):
    df.to_csv(PATH + "/stocks/" + ticker + ".csv")

# Return Valid Dates in Dataframe

In [7]:
def get_valid_dates(df, sdate, edate):
    try:
        mask = (df["Date"] > sdate) & (df["Date"] < edate)
        sm_df = df.loc[mask]
        sm_df = sm_df.set_index(["Date"])
        sm_date = sm_df.index.min()
        last_date = sm_df.index.max()

        date_leading = "-".join(("0" if len(x) < 2 else "") + x for x in sm_date.split("-"))
        date_ending = "-".join(("0" if len(x) < 2 else "") + x for x in last_date.split("-"))

    except Exception:
        print("Date Corrupted")
    else:
        return date_leading, date_ending

# Returns Return on Investment over Time

In [8]:
def roi_between_dates(df, sdate, edate):
    try:
        start_val = df.loc[sdate, "Adj Close"]
        end_val = df.loc[edate, "Adj Close"]
        roi = ((end_val - start_val) / start_val)
    except Exception:
        print("Data Corrupted")
    else:
        return roi

# Get Mean Between Dates

In [9]:
def get_mean_between_dates(df, sdate, edate):
    mask = (df["Date"] > sdate) & (df["Date"] <= edate)
    return df.loc[mask]["Adj Close"].mean()

# Get Standard Deviation Between Dates

In [10]:
def get_sd_between_dates(df, sdate, edate):
    mask = (df["Date"] > sdate) & (df["Date"] <= edate)
    return df.loc[mask]["Adj Close"].std()