In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append('../')
import utilities


import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
tickers = pd.read_csv('../data_cleaning/stocks/dense_95.csv')

# for local testing
tickers = tickers.head(10)

In [None]:
# start with the A ticker

ticker = "AAPL"
df = utilities.load_stock(ticker)

In [None]:
df.head()

In [None]:
df = utilities.RSI(df, 14)
df = utilities.RSI(df, 28)
df = utilities.RSI(df, 56)

df = utilities.MACD(df)

df = utilities.EMA(df, 10)
df = utilities.EMA(df, 50)
df = utilities.EMA(df, 200)

df = utilities.market_hours_only(df)

In [None]:
# Indicator normalizing

# normalize the minute column: 570-960 -> 0-1
df['minute'] = (df['minute'] - 570) / 390

# normalize the date: earliest date is 0, latest date is 1
df['date'] = (df['date'] - df['date'].min()) / (df['date'].max() - df['date'].min())

# divide all the RSI values by 100
df['rsi14'] /= 100
df['rsi28'] /= 100
df['rsi56'] /= 100

# divide all the ema values by 100
df['ema10'] /= 100
df['ema50'] /= 100
df['ema200'] /= 100

In [None]:
df.head()

In [None]:
# Price normalizing

days = utilities.list_of_day_dfs(df)
days = [group.copy() for _, group in df.groupby(df['timestamp'].dt.date)]

# for each day: sort the prices, then normalize them to 0-1 where 0 is the lowest price and 1 is the highest price for that day
for day in days:
    for col in ['high', 'low', 'volume', 'macd', 'signal', 'histogram', 'rsi14', 'rsi28', 'rsi56', 'ema10', 'ema50', 'ema200']:
        day[col] = (day[col] - day[col].min()) / (day[col].max() - day[col].min())

# recombine the days
df = pd.concat(days)
df = df.drop(columns=['timestamp'])

df.head()

In [None]:
# Price normalizing needs to be the only metric that's forward looking

# Create a new column called 'enter' that represents whether or not the price will he higher or lower in 5 minutes
# 1 if the price will be higher, 0 if the price will be lower
df['enter'] = (df['close'].shift(-5) > df['close']).astype(int)
df = df.drop(columns=['open', 'close'])

In [None]:
df.describe()

In [None]:
df.to_csv(f'../cleaned_data/{ticker}.csv')