In [1]:
import sys
import os

sys.path.append('/Users/raphaelravinet/Code')
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine, select
from datetime import datetime, timedelta
import logging
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import ta
from algo_trading.Data.log_config import setup_logging
from Fin_Database.Data.connect import engine, DailyStockData, HourlyStockData, OneMinuteStockData, FiveMinuteStockData,FifteenMinuteStockData, StockSplits, StockNews, CompanyFinancials
from algo_trading.Pre_Processing.pre_processing import PreProcessing

In [2]:
aapl_min = pd.read_csv('/Users/raphaelravinet/Code/algo_trading/aapl_minute.csv')
aapl_daily = pd.read_csv('/Users/raphaelravinet/Code/algo_trading/aapl_daily.csv')

In [3]:
df_daily = PreProcessing(aapl_daily).filter_market_hours().setting_index().df
df_min = PreProcessing(aapl_min).filter_market_hours().setting_index().df


In [4]:
fast_periods = [5, 10, 30]
slow_periods = [50, 100, 200,300]
periods = fast_periods + slow_periods

In [5]:
class TechnicalIndicators:
    def __init__(self, df):
        self.df = df
        
    def calculate_log_return(self):
        self.df['log_ret'] = np.log(self.df['close']) - np.log(self.df['close'].shift(1))[1:]
        return self.df

    def calculate_rsi(self, window=14):
        self.df[f'RSI_{window}'] = ta.momentum.RSIIndicator(self.df['close'], window=window).rsi()
        return self

    def calculate_macd(self):
        self.df['MACD'] = ta.trend.MACD(self.df['close']).macd_diff()
        return self

    def calculate_roc(self, window=14):
        self.df['ROC'] = ta.momentum.roc(self.df['close'], window=window)
        return self

    def calculate_stoch(self, window=14):
        self.df['Stoch'] = ta.momentum.stoch(self.df['high'], self.df['low'], self.df['close'], window=window)
        return self

    def calculate_adx(self, window=14):
        self.df['ADX'] = ta.trend.ADXIndicator(self.df['high'], self.df['low'], self.df['close'], window=window).adx()
        self.df['ADX_pos'] = ta.trend.ADXIndicator(self.df['high'], self.df['low'], self.df['close'], window=window).adx_pos()
        self.df['ADX_neg'] = ta.trend.ADXIndicator(self.df['high'], self.df['low'], self.df['close'], window=window).adx_neg()
        return self

    def calculate_hl_mean(self, window=25):
        self.df[f'rolling_H-L_{window}'] = (self.df['high'] - self.df['low']).rolling(window=window).mean()
        return self
    
    def calculate_lower_band(self, window = 10):
        self.df['lower_band'] = self.df['high'].rolling(window=10).max() - 2.5 * self.df['rolling_H-L_25']
        return self
    
    def calculate_atr(self, window=14):
        self.df['ATR'] = ta.volatility.AverageTrueRange(self.df['high'], self.df['low'], self.df['close'], window=window).average_true_range()
        return self

    def calculate_ibs(self):
        self.df['IBS'] = (self.df['close'] - self.df['low']) / (self.df['high'] - self.df['low'])
        return self

    def calculate_obv(self):
        self.df['OBV'] = ta.volume.OnBalanceVolumeIndicator(self.df['close'], self.df['volume']).on_balance_volume()
        return self
    
    
    def calculate_moving_averages(self, periods):
        for period in periods:
            self.df[f'sma_{period}'] = ta.trend.sma_indicator(self.df['close'], window=period)
            self.df[f'ema_{period}'] = ta.trend.ema_indicator(self.df['close'], window=period)
        return self
    
    def calculate_slope(self, series, n):
        def series_slope(y):
            x = np.arange(len(y))
            slope, _ = np.polyfit(x, y, 1)
            pct_slope = (slope / y[0]) * 100
            return pct_slope
        
        slopes = series.rolling(window=n).apply(series_slope, raw=True)
        return slopes
    
    def calculate_moving_average_slopes(self, periods, slope_period):
        """Calculate the slope of both SMA and EMA for each period in periods"""
        for period in periods:
            self.df[f'sma_slope_{period}'] = self.calculate_slope(self.df[f'sma_{period}'], slope_period)
            self.df[f'ema_slope_{period}'] = self.calculate_slope(self.df[f'ema_{period}'], slope_period)
        return self

    def add_technical_indicators(self, periods, slope_period = 5):
        self.calculate_rsi()
        self.calculate_rsi(window=2)
        self.calculate_macd()
        self.calculate_log_return()
        self.calculate_roc()
        self.calculate_stoch()
        self.calculate_adx()
        self.calculate_hl_mean()
        self.calculate_atr()
        self.calculate_ibs()
        self.calculate_obv()
        self.calculate_moving_averages(periods)
        self.calculate_moving_average_slopes(periods, slope_period)
        return self.df

In [6]:
class features:
    def __init__(self, df):
        self.df = df
    
    def date_features(self):
        self.df['day'] = self.df['date'].dt.day
        self.df['month'] = self.df['date'].dt.month
        self.df['day_of_week'] = self.df['date'].dt.dayofweek
        return self.df
    #add date/time to next corporate announcement
    
    

In [7]:
final_daily_df = TechnicalIndicators(df_daily).add_technical_indicators(periods)
final_min_df = TechnicalIndicators(df_min).add_technical_indicators(periods)

In [8]:
final_min_df

Unnamed: 0_level_0,id,date,timestamp,ticker,open,high,low,close,volume,vwap,...,sma_slope_30,ema_slope_30,sma_slope_50,ema_slope_50,sma_slope_100,ema_slope_100,sma_slope_200,ema_slope_200,sma_slope_300,ema_slope_300
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-06 04:00:00,1,2020-01-06 04:00:00,1578301200000,AAPL,73.8575,73.8775,73.5500,73.5500,7752.0,73.7602,...,,,,,,,,,,
2020-01-06 04:01:00,2,2020-01-06 04:01:00,1578301260000,AAPL,73.5500,73.5500,73.5500,73.5500,1388.0,73.5683,...,,,,,,,,,,
2020-01-06 04:02:00,3,2020-01-06 04:02:00,1578301320000,AAPL,73.6950,73.6950,73.6925,73.6925,1492.0,73.6957,...,,,,,,,,,,
2020-01-06 04:05:00,4,2020-01-06 04:05:00,1578301500000,AAPL,73.6925,73.7375,73.6925,73.7375,2400.0,73.7165,...,,,,,,,,,,
2020-01-06 04:06:00,5,2020-01-06 04:06:00,1578301560000,AAPL,73.6800,73.7500,73.6800,73.7500,2736.0,73.7330,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-01 11:04:00,914404,2024-08-01 11:04:00,1722524640000,AAPL,220.5200,220.6199,220.1500,220.1501,118141.0,220.3381,...,-0.013568,-0.023907,-0.026616,-0.021039,-0.017485,-0.016950,-0.009530,-0.011382,-0.004153,-0.008188
2024-08-01 11:05:00,914405,2024-08-01 11:05:00,1722524700000,AAPL,220.1800,220.4000,220.1500,220.1850,108400.0,220.2574,...,-0.016056,-0.025519,-0.027322,-0.022129,-0.017811,-0.017579,-0.009875,-0.011752,-0.004215,-0.008456
2024-08-01 11:06:00,914406,2024-08-01 11:06:00,1722524760000,AAPL,220.1700,220.6400,220.1200,220.5400,93030.0,220.4027,...,-0.014628,-0.023621,-0.026419,-0.021110,-0.017525,-0.017155,-0.009903,-0.011597,-0.004173,-0.008374
2024-08-01 11:07:00,914407,2024-08-01 11:07:00,1722524820000,AAPL,220.5200,220.5400,220.2600,220.3000,74328.0,220.4553,...,-0.013186,-0.022108,-0.025061,-0.020290,-0.017309,-0.016820,-0.009911,-0.011484,-0.004086,-0.008319


In [None]:
final_min_df.to_csv('/Users/raphaelravinet/Code/algo_trading/aapl_minute_final.csv', index=True)
final_daily_df.to_csv('/Users/raphaelravinet/Code/algo_trading/aapl_daily_final.csv', index=True)


In [None]:
# def calculate_realized_variance(df):
#     realized_variance_per_day = df.groupby(df['date'].dt.date)['log_ret'].apply(lambda x: (x ** 2).sum())
#     return realized_variance_per_day

# def calculate_daily_ewma_volatility(df, lambda_=0.94):
#     df['ewma_volatility'] = np.nan
#     ewma_volatility = 0
#     for i in range(len(df)):
#         if i == 0:
#             ewma_volatility = df['log_return'].iloc[i] ** 2
#         else:
#             ewma_volatility = (lambda_ * ewma_volatility) + ((1 - lambda_) * (df['log_return'].iloc[i] ** 2))
#         df['ewma_volatility'].iloc[i] = np.sqrt(ewma_volatility)
#     return df['ewma_volatility']


# def intraday_vol_profile(df):
    
#     df['time_bucket'] = df['date'].dt.time
#     intraday_vol = df.groupby(df['time_bucket'])['log_ret'].apply(lambda x: np.sqrt((x ** 2).sum()))
    
#     intraday_vol /= intraday_vol.mean()
    
#     return intraday_vol

# def calculate_intraday_volatility(df, intraday_vol_profile):
#     # Map the intraday volatility profile to each row in the dataframe
#     df['intraday_vol'] = df['date'].dt.time.apply(lambda x: intraday_vol_profile[x])

#     # Sum of intraday volatilities for each day
#     daily_intraday_vol = df.groupby(df['date'].dt.date)['intraday_vol'].sum()

#     return daily_intraday_vol