# We want to create an algorithm that will select the best strategy at any given datapoint based on the profitablility over the last 1 hour.
## To do this we'll need to create many dataframes and compare them.
##
Do all of our imports up top

In [137]:
import numpy as np
import os
import pickle
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import pandas.io.data as web
%matplotlib inline

In [138]:
def get_dataframe():
    mydata = None
    try:
        mydata = pickle.load(open('modelling/stats_pickle5', 'rb'))
    except:
        mydata = pickle.load(open('../resources/stats_pickle5', 'rb'))
    df = pd.DataFrame(mydata, columns=["Time", "Value"])
    df = df.set_index('Time')
    return df

In [143]:
class Name():
    def __init__(self, means, sd):
        self.mean1 = str(means[0])+"hr"
        self.mean1_val = means[0]
        self.mean2 = str(means[1])+"hr"
        self.mean2_val = means[1]
        self.name = "{}-{}-{}".format(self.mean1,self.mean2,sd)
        self.sd = sd
        self.strategy = "Strategy-{}-%.1f".format(self.name) % sd
        self.regime = "Regime-{}-%.1f".format(self.name) % sd
        self.cumsum = "Cumulative-{}-%.1f".format(self.name) % sd
    
    @classmethod
    def from_strategy_name(cls, strat):
        parts = strat.split('-')
        return cls([p.rstrip("hr") for p in parts[1:3]],float(parts[3]))

In [140]:
class StrategySelector():
    def __init__(self, dataframe, name, means, sds):
        self.dataframe = dataframe
        self.current_strategy=None
        self.means = means
        self.create_means()
        self.set_market()
        self.available_stategies = [Name(means, sd) for means in list(itertools.combinations(means, 2)) for sd in SDs]
        for name in regime_and_strategy_names:
            self.create_diff(name)
            self.create_regime(name)
            self.create_strategy(name)
    
    def strategies(self):
        return self.dataframe[[n.strategies for n in self.available_stategies]]
    def create_means(self):
        for mean in self.means:
            self.dataframe[str(mean)+"hr"] = np.round(pd.rolling_mean(df['Value'], window=mean*60), 2)

    def create_diff(self, name):
            self.dataframe[name.name] = self.dataframe[name.mean1] - self.dataframe[name.mean2]

    def create_regime(self, name):
        self.dataframe[name.regime] = np.where(self.dataframe[name.name] > name.sd, 1, 0)
        self.dataframe[name.regime] = np.where(self.dataframe[name.name] < -name.sd, -1, self.dataframe[name.regime])

    def set_market(self):
        self.dataframe['Market'] = np.log(self.dataframe['Value']/ self.dataframe['Value'].shift(1))
        self.dataframe['Market-cumulative'] = self.dataframe['Market'].cumsum().apply(np.exp)

    def create_strategy(self, name):
        self.dataframe[name.strategy] = self.dataframe[name.regime].shift(5) * self.dataframe['Market']
    
    def select_on_time_performance(hours=1):
        dataframe2= self.strategies.cumsum().apply(np.exp) - self.strategies.cumsum().apply(np.exp).shift(hours*60)
        
        #self.dataframe['best_{}_hours'.format(hours)] = self.dataframe[]
        last_24 = df[-2880:][[a.strategy for a in regime_and_strategy_names]].cumsum().apply(np.exp)[-1:].transpose().sort([df.index.tolist()[-1]],ascending=[0])

In [144]:
class Dataset():
    def __init__(self, loc):
        mydata = None
        try:
            mydata = pickle.load(open('modelling/{}'.format(loc), 'rb'))
        except:
            mydata = pickle.load(open('../resources/{}'.format(loc), 'rb'))
        self.dataset = pd.DataFrame(mydata, columns=["Time", "Value"]).drop_duplicates()
        self.dataset["Time"] = pd.to_datetime(self.dataset["Time"])
        self.dataset = self.dataset.set_index('Time')
        
        self.dataset['Market'] = np.log(self.dataset['Value']/ self.dataset['Value'].shift(1))
        
        
    
class Strategy():
    def __init__(self, dataset, means=None, sd=None, name=None):
        
        if name is None: 
            self.name = Name(means, sd)
            cols = list(dataset.columns.values)
            if self.name.mean1 not in cols:
                dataset[self.name.mean1] = np.round(pd.rolling_mean(dataset['Value'], window=self.name.mean1_val*60), 2)
            if self.name.mean2 not in cols:
                dataset[self.name.mean2] = np.round(pd.rolling_mean(dataset['Value'], window=self.name.mean2_val*60), 2)

            self.dataset = pd.DataFrame(dataset[['Market',self.name.mean1, self.name.mean2]])
            # Find our Diff
            self.dataset['Difference']= self.dataset[self.name.mean1] - self.dataset[self.name.mean2]
            # Set our Regime
            self.dataset['Regime'] = np.where(self.dataset['Difference'] > self.name.sd, 1, 0)
            self.dataset['Regime'] = np.where(self.dataset['Difference'] < -self.name.sd, -1, self.dataset['Regime'])
        else:
            self.dataset = pd.DataFrame(dataset[['Market','Regime']])
        self.dataset['Strategy'] = self.dataset['Regime'].shift(1) * self.dataset['Market']
        self.dataset['hourly_gain'] = self.dataset['Strategy'].cumsum().apply(np.exp) - self.dataset['Strategy'].shift(60).cumsum().apply(np.exp)
    


In [166]:
df = Dataset('stats_pickle5')
means = range(3,11)
SDs = np.arange(0.1,0.3,0.1)
strategies = [Strategy(df.dataset, means, sd) for means in list(itertools.combinations(means, 2)) for sd in SDs]
items = {s.name.name:s.dataset for s in strategies}
pan = pd.Panel(items)

hourly_gains = pan.transpose(2,0,1)['hourly_gain'].transpose().dropna()
regimes = pan.transpose(2,0,1)['Regime'].transpose().dropna()

regimes['winning_strategy_name'] = hourly_gains.idxmax(axis=1).dropna()
regimes["winning_strategy_name"] = np.where(pd.isnull(regimes["winning_strategy_name"]), 0, regimes["winning_strategy_name"])
#regimes["winning_strategy_index"] = np.where(pd.isnull(regimes["winning_strategy_name"]), 0, regimes.columns.get_loc(regimes["winning_strategy_name"]))
#regimes["winning_strategy_index"].value_counts()
for name in strategies:
    regimes["winning_strategy"] = np.where(regimes['winning_strategy_name'] == name.name.name, regimes[name.name.name], 0)
    regimes["winning_strategy_index"] = np.where(regimes['winning_strategy_name'] == name.name.name, regimes.columns.tolist().index(name.name.name), -1)
    #print(regimes.columns.get_loc(name.name.name), name.name.name)
    #print(regimes.columns.tolist().index(name.name.name))

#regimes['winning_strategy_index'].value_counts()
df.dataset['Regime'] = regimes['winning_strategy']
s = Strategy(df.dataset, name="Hourly Best")
regimes['winning_strategy_index']#.resample("H",fill_method="ffill")
#s.dataset[['Regime']].plot()
#s.dataset[['Strategy', 'Market']].cumsum().apply(np.exp).plot(grid=True, figsize=(20,12))




Time
2015-06-09 00:14:00   -1
2015-06-09 00:15:00   -1
2015-06-09 00:17:00   -1
2015-06-09 00:18:00   -1
2015-06-09 00:19:00   -1
2015-06-09 00:20:00   -1
2015-06-09 00:21:00   -1
2015-06-09 00:22:00   -1
2015-06-09 00:23:00   -1
2015-06-09 00:25:00   -1
2015-06-09 00:26:00   -1
2015-06-09 00:27:00   -1
2015-06-09 00:29:00   -1
2015-06-09 00:30:00   -1
2015-06-09 00:31:00   -1
2015-06-09 00:33:00   -1
2015-06-09 00:34:00   -1
2015-06-09 00:35:00   -1
2015-06-09 00:36:00   -1
2015-06-09 00:37:00   -1
2015-06-09 00:38:00   -1
2015-06-09 00:39:00   -1
2015-06-09 00:40:00   -1
2015-06-09 00:41:00   -1
2015-06-09 00:42:00   -1
2015-06-09 00:43:00   -1
2015-06-09 00:44:00   -1
2015-06-09 00:45:00   -1
2015-06-09 00:46:00   -1
2015-06-09 00:47:00   -1
                      ..
2015-06-24 06:04:00   -1
2015-06-24 06:05:00   -1
2015-06-24 06:06:00   -1
2015-06-24 06:07:00   -1
2015-06-24 06:08:00   -1
2015-06-24 06:09:00   -1
2015-06-24 06:10:00   -1
2015-06-24 06:11:00   -1
2015-06-24 06:12:00 