In [6]:
import pandas as pd
import math
from datetime import date
import glob
import json
import numpy as np
import nbimporter
import sys
import numpy as np


In [13]:
def load_data(file):
    daily = pd.read_csv(file)
    if 'Date' in daily.columns:
        daily['Date'] = pd.to_datetime(daily['Date'])
    else:
        daily['Date'] = pd.to_datetime(daily['Datetime'], utc=True).dt.tz_convert('US/Eastern')
    daily = daily.sort_values(by='Date', ascending=True)
    daily['Day'] = daily['Date'].dt.strftime('%m/%d/%Y')
    daily['Time'] = daily['Date'].dt.strftime('%H:%M')
    daily = daily.dropna().reset_index(drop=True)
    return daily

def load_all_5min_spx():
    files = [ 
        "/Users/partha/Downloads/0-DTE/indexes/SPX-Dec-13.csv",
        "/Users/partha/Downloads/0-DTE/indexes/SPX-Dec-16.csv",
        "/Users/partha/Downloads/0-DTE/indexes/SPX-Dec-12.csv",
        "/Users/partha/Downloads/0-DTE/indexes/SPX-Dec-9.csv",
        "/Users/partha/Downloads/0-DTE/indexes/SPX-Dec-8.csv",
            "/Users/partha/Downloads/0-DTE/indexes/SPX-DEC-6.csv",
             "/Users/partha/Downloads/0-DTE/indexes/SPX-DEC-7.csv",
            "/Users/partha/Downloads/0-DTE/indexes/spx-oct-28-snapshot.csv",
        "/Users/partha/Downloads/0-DTE/indexes/SPX-Dec-20.csv",
    "/Users/partha/Downloads/0-DTE/indexes/SPX-Dec-19.csv"]

    df = pd.DataFrame()
    for file in files:
        print ('Loading file {}'.format(file))
        df = df.append(load_data(file))
    df = df[df['Day'] > '08/31/2022']
    return df.drop_duplicates().sort_values(by='Date', ascending=True)

In [17]:
def build_seq(segment, split_size=10):
    rge = segment['High'].max() - segment['Low'].min()
    ilist = np.array_split(segment, split_size)
    amap = []
   
    for idf in ilist:
        try:
            idf = idf.reset_index(drop=True)
            s = int((idf[:1]['Open'].min()-segment['Low'].min())*split_size/rge)
            e = int((idf.iloc[-1]['Close'].min()-segment['Low'].min())*split_size/rge)
            if not amap:
                amap.append(s)
            else:    
                amap.append(e)
        except Exception as e:
            print ("Exception while building Sequence {}".format(e))
            pass    
    return amap
def train_segments(chart, seq_map, split_size=10):
    chart = chart.reset_index(drop=True)
    for i in range(0, len(chart)):
        segment = chart[i:i+split_size]
        segment = segment.reset_index(drop=True)
        if len(segment) < split_size:
            continue
        
        amap = build_seq(segment,split_size)
        
        if len(amap) != split_size:
               continue
        if i % 200 == 0:
            
            #print ('Completed {} iterations'.format(i))
            pass
        amap = [item - amap[0] for item in amap]
        
        seq_map[str(amap)] = {"Sequence": amap, 
                              'Start date': segment.iloc[0]['Day'], 
                              'End Date': segment.iloc[-1]['Day'],
                              'Start Time' : segment.iloc[0]['Time'],
                              'End Time' : segment.iloc[-1]['Time']
                             }              
    return seq_map

In [20]:


def to_pst(time1):
    hhmm = time1.split(":")
    return '' + str((int(hhmm[0]) - 3)) + ':'+ hhmm[1]

def to_est(time1):
    hhmm = time1.split(":")
    est = '' + str((int(hhmm[0]) + 3)) + ':'+ hhmm[1]
    if (int(hhmm[0]) + 3) == 9:
        est = '0' + est
    return est

def test_one_segment(dft, model, window):
    #print ("Testing: {}- {} and {}-{}".format(dft.iloc[0]['Day'], to_pst(dft.iloc[0]['Time']), dft.iloc[-1]['Day'], to_pst(dft.iloc[-1]['Time'])))
    
    pred = test_a_segment(dft, model, split_size=window)
    return pred


        
def test_a_day(thisday, window, horizon, offset=25, reference='11:30', close='13:00'):
    up = 'Up'
    down = 'Down'
    model_file = '/Users/partha/Downloads/0-DTE/models/SPX-model-{}.json'.format(window)
    model = json.loads(open(model_file).read())
    print ("Loaded model {} successfully ({})".format(model_file, len(model)))
    daydf = df5min_test[(df5min_test['Day'] == thisday)]
    
    test_samples = daydf[daydf['Time'] <= reference]
    
    
    test_samples = test_samples[-window:]
    
    
    #print ("length of df {}".format( len(dft)))
    pred = test_one_segment(test_samples, model, window)
    # get predicted dataframe 
    pdf = df5min_test[(df5min_test['Day'] >= pred['End Date']) & (df5min_test['Time'] >= pred['End Time'])]
   
    predicted_outcome = up if pdf.iloc[-1]['Close'] > pdf.iloc[0]['Close'] else down
    #daydf = df5min_test[(df5min_test['Day'] == thisday)]
    response = {
                'Test Day': thisday, 'TestTime0': to_pst(test_samples.iloc[0]['Time']), 
                'TestTime1':  to_pst(test_samples.iloc[-1]['Time']),
                'Start': pred['Start date'], 'End':pred['End Date'], 
                'Time0': to_pst(pred['Start Time']), 'Time1': to_pst(pred['End Time']),
                'Distance': pred['Distance'],
                'predicted Until' : pdf.iloc[-1]['Time'],
                'Actual Until' : daydf.iloc[-1]['Time'],
                'Tags' : classified.get(pred['Start date'], None),
                'Predicted Outcome' : predicted_outcome       
               }
   
    
#     response['Diff'] = test_samples.iloc[-1]['Time'] + " - " + str(test_samples.iloc[-1]['Close']) + \
#                             " / " + daydf.iloc[-1]['Time'] + " - "+ str(daydf.iloc[-1]['Close'])
    response['Actual'] = up if daydf.iloc[-1]['Close'] > test_samples.iloc[-1]['Close'] else down
    #response['End Time'] = dft1.iloc[-1]['Time']
    #response['Start Time'] = dft1.iloc[0]['Time']
    if len(cdf) > 0 and horizon:
        for h in horizon:
            h = to_est(h)
            try:
                response['At{}'.format(h.replace(':', '_'))] = up if cdf.iloc[0]['Close'] - cdf.loc[cdf['Time'] == h].iloc[0]['Close'] < 0 else down
                                                
            except:
                value = 'Error'
                try:
                    value = up if cdf.iloc[0]['Close'] - cdf.loc[cdf['Time'] == close].iloc[0]['Close'] < 0 else down
                except:
                    pass
                response['At{}'.format(h.replace(':', '_'))] = value
    return response


horizons = ['12:00', '12:55']
def run_test(horizons, windows, test_day = '12/09/2022', offset=25, reference='11:30'):  # #[10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25])
    predictions = []
    for i in windows:
        pred = test_a_day(test_day, i, horizons, offset, reference=reference)
        pred['Window'] = i
        if pred['Start'] != pred['End']:
            #continue
            pass
        if pred['Start'] == test_day:
            #continue
            pass
        predictions.append(pred)
    result = pd.DataFrame(predictions)
#     if window is None:
#         # Compare horizons
#         for h in horizons:
#             h = 'At' + to_est(h).replace(':', '_')
#             result.loc[result[h] == result['Actual'], h+'_'] = 1
#             result[h+'_'] = result[h+'_'].fillna(0)
#             result = result.drop(columns=[h])
    return result


In [16]:
spx = load_all_5min_spx()


Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-Dec-13.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-Dec-16.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-Dec-12.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-Dec-9.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-Dec-8.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-DEC-6.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-DEC-7.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/spx-oct-28-snapshot.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-Dec-20.csv




Loading file /Users/partha/Downloads/0-DTE/indexes/SPX-Dec-19.csv




NameError: name 'build_seq' is not defined

In [19]:
model = train_segments(spx[spx['Day'] <= '11/15/2022'], {}, split_size=25)

In [None]:
def run_experiment(df, window=25):
    model_file = '/Users/partha/Downloads/0-DTE/models/SPX-model-{}.json'.format(window)
    pred = test_one_segment(df[-25:], model, window)
    