In [1]:
import yfinance as yf

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

from cvxopt import matrix 
from cvxopt import solvers

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.metrics import mean_squared_error

In [2]:
def get_yfinance_data(ticker_list : list[str], start_date : str, end_date : str):
    """ Get data from yfinance for a list of tickers.
    
    It includes Open, High, Low, Close, Adj. Close, and Volume.
    
    Args:
        ticker_list: List of ticker symbols
        start_date: Start date of the data
        end_date: End date of the data

    Returns:
        A dict mapping ticker symbols to dataframes containing the data
        fetched from yfinance
    """
    data_dict = {}

    for ticker_symbol in ticker_list:
        data = yf.download(ticker_symbol, start=start_date, end=end_date)
        data_dict[ticker_symbol] = data

    return data_dict

In [3]:
# Example tickers and dates
aapl = "AAPL"
msft = "MSFT"
tsla = "TSLA"

tickers = [aapl, msft, tsla]

start_date = "2021-01-01"
end_date = "2024-01-01"

yfinance_data_dict = get_yfinance_data(tickers, start_date, end_date)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [4]:
# Example dataframe
example_df = yfinance_data_dict["AAPL"]
example_df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-10-30,169.020004,171.169998,168.869995,170.289993,170.289993,51131000
2023-10-31,169.350006,170.899994,167.899994,170.770004,170.770004,44846000
2023-11-01,171.0,174.229996,170.119995,173.970001,173.970001,56934900
2023-11-02,175.520004,177.779999,175.460007,177.570007,177.570007,77334800
2023-11-03,174.240005,176.820007,173.350006,176.649994,176.649994,79763700


In [5]:
# Returns
for ticker_df in yfinance_data_dict.values():
    ticker_df["Returns"] = (ticker_df["Close"] - ticker_df["Open"]) / ticker_df["Open"]

In [6]:
# Log Returns
for ticker_df in yfinance_data_dict.values():
    ticker_df["Log Returns"] = np.log(ticker_df["Close"]).diff()

In [7]:
# SMA
SMA_PERIOD = 10
for ticker_df in yfinance_data_dict.values():
    ticker_df["SMA"] = ticker_df["Close"].rolling(window=SMA_PERIOD).mean()

In [8]:
# EMA
EMA_PERIOD = 10
for ticker_df in yfinance_data_dict.values():
    ticker_df["EMA"] = ticker_df["Close"].ewm(span=EMA_PERIOD, adjust=False).mean()

In [9]:
# VWAP
for ticker_df in yfinance_data_dict.values():
    value = ticker_df["Close"] * ticker_df["Volume"]
    cumulative_value = value.cumsum()
    cumulative_volume = ticker_df["Volume"].cumsum()
    ticker_df["VWAP"] = cumulative_value / cumulative_volume

In [10]:
# CCI
CCI_PERIOD = 10
for ticker_df in yfinance_data_dict.values():
    typical_price = (ticker_df["High"] + ticker_df["Low"] + ticker_df["Close"]) / 3
    mean_typical_price = typical_price.rolling(window=CCI_PERIOD).mean()
    mean_deviation = (typical_price - mean_typical_price).abs().rolling(window=CCI_PERIOD).mean()
    ticker_df["CCI"] = (typical_price - mean_typical_price) / (0.015 * mean_deviation)

In [11]:
# Volatility
VOLATILITY_PERIOD  = 10
for ticker_df in yfinance_data_dict.values():
    ticker_df["Volatility"] = ticker_df["Returns"].rolling(window=VOLATILITY_PERIOD).std()

In [12]:
# RoC
ROC_PERIOD  = 10
for ticker_df in yfinance_data_dict.values():
    ticker_df["RoC"] = (ticker_df["Close"] / ticker_df["Close"].shift(ROC_PERIOD) - 1) * 100

In [13]:
for ticker_df in yfinance_data_dict.values():
    ticker_df.drop(["Open", "High", "Low", "Close", "Adj Close"], axis = 1, inplace=True)
    ticker_df.dropna(inplace=True)

In [14]:
# Example dataframe after feature construction
example_df.tail()

Unnamed: 0_level_0,Volume,Returns,Log Returns,SMA,EMA,VWAP,CCI,Volatility,RoC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-10-30,51131000,0.007514,0.01223,172.427,171.871979,151.941438,-46.545218,0.010096,-4.71688
2023-10-31,44846000,0.008385,0.002815,171.789001,171.67162,151.956118,-36.778702,0.010464,-3.601462
2023-11-01,56934900,0.017368,0.018565,171.602002,172.089507,151.977886,22.918323,0.011866,-1.063464
2023-11-02,77334800,0.01168,0.020482,171.813002,173.085962,152.012214,89.490143,0.012157,1.202554
2023-11-03,79763700,0.013831,-0.005195,172.190001,173.733968,152.046254,64.258348,0.01103,2.180697


In [15]:
# TODO: Construct labels and join data frames for all stocks
# TODO: One-hot encode target column (stock with highest returns)
# TODO: Preprocess the data

In [16]:
# TODO: Create classifier model