In [1]:
# Import Packages

import pandas as pd
import numpy as np

# Functions

def read_data(ticker):
    data = pd.read_csv(ticker)
    data.columns=['Date','Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
    data = data.drop('Date', axis=1)
    return data

def transformation(data):   

    data['diff_Daily'] = data['Close'] - data['Open']
    data['fluc_Daily'] = data['High'] - data['Low']
    #data['diff_Vol'] = data['Volume'].diff()

    for i in range(1,6):
        data[f"Close_lag_{i}day"] = data['Close'].shift(i)    
    for i in range(5,1,-1):
        data[f"diff_Close_lag_{i}_{i-1}day"] = data[f"Close_lag_{i}day"]- data[f"Close_lag_{i-1}day"]
    
    data["diff_Close_lag_1_0day"] = data["Close_lag_1day"]- data['Close']
    data['Close_next_1day'] = data['Close'].shift(-1)
    data['diff_Close_next_1day']= data['Close_next_1day'] - data['Close']

    return data
 

# RSI (Relative Strength Index)
def calculate_rsi(data):
    delta = data['Close'].diff()

    for i in range(7,15):
        gain = (delta.where(delta > 0, 0)).rolling(window=i).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=i).mean()
        rs = gain / loss
        rsi = 100 - (100 / (1 + rs))
        data[f'RSI_{i}'] = rsi
        # data[f'RSI_{i}_lag_1day'] = data[f'RSI_{i}'].shift(1)
        # data[f'RSI_{i}_lag_2day'] = data[f'RSI_{i}'].shift(2)
        # data[f'RSI_{i}_lag_3day'] = data[f'RSI_{i}'].shift(3)
    return data

# MACD (Moving Average Convergence Divergence)
def calculate_macd(data):
    for i in range(1,6):
        fastperiod=10+i
        slowperiod=24+i
        signalperiod=7+i

        ema_fast = data['Close'].ewm(span=fastperiod, min_periods=slowperiod).mean()
        ema_slow = data['Close'].ewm(span=slowperiod, min_periods=slowperiod).mean()
        macd = ema_fast - ema_slow
        signal = macd.ewm(span=signalperiod, min_periods=signalperiod).mean()
        
        data[f'MACD_f{fastperiod}_s{slowperiod}_p{signalperiod}'] = macd
        data[f'MACD_Signal_f{fastperiod}_s{slowperiod}_p{signalperiod}'] = signal
        data[f'MACD_Hist_f{fastperiod}_s{slowperiod}_p{signalperiod}'] = macd - signal
        # data['MACD_lag_1day'] = data['MACD'].shift(1)
        # data['MACD_lag_2day'] = data['MACD'].shift(2)
        # data['MACD_lag_3day'] = data['MACD'].shift(3)
    return data

def drop_data(data):
    data.drop(columns=['Open','High','Low','Volume', 'Adj Close','Close_lag_1day', 'Close_lag_2day', 'Close_lag_3day', 'Close_lag_4day', 'Close_lag_5day'],inplace=True)
    return data

def main_function():
    ticker = "CEB.csv"
    data = read_data(ticker)
    data = transformation(data) 
    data = calculate_rsi(data)  
    data = calculate_macd(data)
    data = drop_data(data)
    data.dropna(inplace=True) 

    return data

df = main_function()
df.to_csv("data.csv",index=False)