# CSE 881 Project

In [1]:
import yfinance as yf

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

In [2]:
def get_yfinance_data(ticker_list : list[str], start_date : str, end_date : str):
    """ Get data from yfinance for a list of tickers.
    
    It includes Open, High, Low, Close, Adj. Close, and Volume.
    
    Args:
        ticker_list : List of ticker symbols
        start_date : Start date of the data
        end_date : End date of the data

    Returns:
        A dict mapping ticker symbols to dataframes containing the data
        fetched from yfinance
    """
    
    data_dict = {}

    for ticker_symbol in ticker_list:
        data = yf.download(ticker_symbol, start=start_date, end=end_date)
        data_dict[ticker_symbol] = data

    return data_dict

In [3]:
# Example tickers and dates
aapl = "AAPL"
msft = "MSFT"
tsla = "TSLA"

tickers = [aapl, msft, tsla]

start_date = "2022-10-01"
end_date = "2023-12-31"

yfinance_data_dict = get_yfinance_data(tickers, start_date, end_date)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [4]:
# Returns
for ticker_df in yfinance_data_dict.values():
    ticker_df["Returns"] = ticker_df["Close"].pct_change()

In [5]:
# SMA
sma_period = 5
for ticker_df in yfinance_data_dict.values():

    ticker_df['SMA'] = ticker_df['Close'].rolling(window=sma_period).mean()

In [6]:
# EMA
ema_period = 5
for ticker_df in yfinance_data_dict.values():

    ticker_df['EMA'] = ticker_df['Close'].ewm(span=ema_period, adjust=False).mean()

In [7]:
# VWAP
for ticker_df in yfinance_data_dict.values():
    value = ticker_df['Close'] * ticker_df['Volume']
    cumulative_value = value.cumsum()
    cumulative_volume = ticker_df['Volume'].cumsum()
    ticker_df['VWAP'] = cumulative_value / cumulative_volume

In [8]:
# CCI
cci_period = 5
for ticker_df in yfinance_data_dict.values():

    ticker_df['Typical Price'] = (ticker_df['High'] + ticker_df['Low'] + ticker_df['Close']) / 3
    mean_typical_price = ticker_df['Typical Price'].rolling(window=cci_period).mean()
    mean_deviation = (ticker_df['Typical Price'] - mean_typical_price).abs().rolling(window=cci_period).mean()
    ticker_df['CCI'] = (ticker_df['Typical Price'] - mean_typical_price) / (0.015 * mean_deviation)


In [9]:
# Volatility
volatility_period  = 5
for ticker_df in yfinance_data_dict.values():
    ticker_df['Volatility'] = ticker_df['Returns'].rolling(window=volatility_period).std()

In [10]:
# RoC
roc_period  = 5
for ticker_df in yfinance_data_dict.values():
    ticker_df['RoC'] = (ticker_df['Close'] / ticker_df['Close'].shift(roc_period) - 1) * 100

In [11]:
# Drop rows with NAs that resulted from rolling window calculations
for ticker_df in yfinance_data_dict.values():
    ticker_df.dropna(inplace=True)

In [12]:
yfinance_data_dict["AAPL"].head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,SMA,EMA,VWAP,Typical Price,CCI,Volatility,RoC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-10-13,134.990005,143.589996,134.369995,142.990005,142.146744,113224000,0.033613,140.163998,141.164317,142.434252,140.316666,9.177564,0.025305,-1.677774
2022-10-14,144.309998,144.520004,138.190002,138.380005,137.563919,88598000,-0.03224,139.822,140.236213,142.016642,140.363337,16.977952,0.023856,-1.220638
2022-10-17,141.070007,142.899994,140.270004,142.410004,141.57016,85250900,0.029123,140.220001,140.96081,142.052114,141.860001,72.732056,0.027816,1.417181
2022-10-18,145.490005,146.699997,140.610001,143.75,142.902252,99136600,0.009409,141.174002,141.89054,142.213263,143.686666,121.101315,0.026823,3.432152
2022-10-19,141.690002,144.949997,141.5,143.860001,143.011597,61758300,0.000765,142.278003,142.547027,142.305193,143.436666,75.821696,0.026342,3.990172
2022-10-20,143.020004,145.889999,142.649994,143.389999,142.544388,64522000,-0.003267,142.358002,142.828018,142.364976,143.976664,57.548817,0.022274,0.279736
2022-10-21,142.869995,147.850006,142.649994,147.270004,146.401489,86548600,0.027059,144.136002,144.30868,142.702611,145.923335,77.179454,0.014867,6.424338
2022-10-24,147.190002,150.229996,146.0,149.449997,148.568619,75981900,0.014803,145.544,146.022452,143.087122,148.559998,103.808663,0.011994,4.943468
2022-10-25,150.089996,152.490005,149.360001,152.339996,151.441589,74732300,0.019338,147.262,148.1283,143.578215,151.396667,120.156273,0.012722,5.97565
2022-10-26,150.960007,151.990005,148.039993,149.350006,148.469223,88194300,-0.019627,148.360001,148.535536,143.918424,149.793335,45.996914,0.018892,3.816214


In [13]:
# TODO: Preprocess the data

In [14]:
# TODO: Implement model prototype

In [15]:
# TODO: Implement Markowitz mean-variance model
def markowitz_mean_variance(returns, covariance_matrix):
    return np.zeros(3)