# Create indicator data files

In [1]:
import pandas as pd
import glob
import os

NEW_FOLDER = '../indicator-data/'

## Function to load data

In [2]:
def load_data(filepath, keep_cols, label_col = None):   
    # read csv data
    df = pd.read_csv(filepath)
    
    # select only keep columns
    df = df[keep_cols]

    # drop missing rows
    df.dropna(inplace=True)
    
    return df

In [3]:
# Calculate proportion change from one column to another
def p_change(df, col1, col2):
    col1_s = pd.Series(df[col1])
    col2_s = pd.Series(df[col2])
    pc_c1_c2 = []
    for idx, value in enumerate(col1_s):
        pc = col2_s[idx] - col1_s[idx]
        pc = pc / col1_s[idx]
        pc_c1_c2.append(pc)
    return pc_c1_c2

## Load all of the files in the labeled data folder

In [4]:
input_files = glob.glob('../labeled-data/*.csv')

for filepath in input_files:
    df = load_data(filepath, keep_cols = ['symbol', 'Date', 'High', 'Low','Open','Close','signal'])
    sma15c = df.Close.rolling(15,1).mean()
    sma30c = df.Close.rolling(30,1).mean()
    pc_sma15c_sma30c = []
    # Calculate proportion change from 15 - 20 day SMA
    for idx, value in enumerate(sma15c):
        pc = sma30c[idx] - sma15c[idx]
        pc = pc / sma30c[idx]
        pc_sma15c_sma30c.append(pc)
    df['pc_sma15c_sma30c'] = pc_sma15c_sma30c
    df['pc_open_close'] = p_change(df, 'Open', 'Close')
    df['pc_high_low'] = p_change(df, 'High', 'Low')
    df['pc_low_close'] = p_change(df, 'Low', 'Close')
    bp = os.path.basename(filepath)
    new_file_name = NEW_FOLDER + bp
    df.to_csv(new_file_name, index=False)