# Bitcoin Financial Modelling
##Based of previous experimentation, I'm going to break down what I've found into classes, and explain each one individually

In [None]:
import numpy as np
import os
import pickle
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import pandas.io.data as web
%matplotlib inline

###Step 1: Setting up our dataset

In [None]:
# This class is responsible for holding our initial pandas dataframe after we import our data from a pickled object in a file
class Dataset():
    def __init__(self, loc):
        # load from file
        mydata = None
        try:
            mydata = pickle.load(open('modelling/{}'.format(loc), 'rb'))
        except:
            mydata = pickle.load(open('../resources/{}'.format(loc), 'rb'))
        
        # Create the dataframe and label the columns
        self.dataset = pd.DataFrame(mydata, columns=["Time", "Value"]).drop_duplicates()
        
        # Set time as the index column. 
        # pd.to_datetime just makes the the time column pandas time dataype (easier to manipulate)
        self.dataset["Time"] = pd.to_datetime(self.dataset["Time"])
        self.dataset = self.dataset.set_index('Time')
        
        # Set our market change... this tells us how many percent the market has moved from the last sample datapoint.
        self.dataset['Market'] = np.log(self.dataset['Value']/ self.dataset['Value'].shift(1))

###Step 2: A class to create strategies

In [None]:
class Strategy():
    # Take name and a dataset
    def __init__(self, dataset, name, stop = False):
        self.name = name
        # We only care about market and regime values
        # We need to call convert convert_objects so things don't go tits up
        self.dataset = dataset[['Market','Regime']]#.convert_objects(convert_numeric=True)
        # Calculate the strategies taking if we don't count shorting (useful for final profit margins)
        if stop:
             self.dataset['Strategy'] = np.where(dataset['Regime'].shift(1) > 0, dataset['Regime'].shift(1) * dataset['Market'], 0)
        else:
            # Caltulate strategies if shorting is included, good for determining performance
            self.dataset['Strategy'] = dataset['Regime'].shift(1) * dataset['Market']
            # Work out hourly gain, I need to make this more general
            self.dataset['hourly_gain'] = self.dataset['Strategy'].cumsum().apply(np.exp) - self.dataset['Strategy'].shift(60).cumsum().apply(np.exp)
        self.dataset = self.dataset.convert_objects(convert_numeric=True)
    # These are our different inits effectively.
    # from regime is from an existing dataset with a regime already worked out
    @classmethod
    def from_regime(cls, dataset, name):
        new_dataset = pd.DataFrame(dataset[['Market','Regime']])
        return Strategy(new_dataset, name, True)
    
    # Return our strategy created from a couple of means and their standard deviations.
    @classmethod
    def from_means(cls, dataset, mean1, mean2, sd):
        # Give it a name that makes sense
        name = "{}-{}-{}".format(mean1,mean2,sd)
        # See if the means are already in the main dataset... this is just to save on computation time.
        cols = list(dataset.columns.values)
        # If these means haven't mean figured out yet, then do it
        if mean1 not in cols:
            dataset[mean1] = np.round(pd.rolling_mean(dataset['Value'], window=mean1), 2)
        if mean2 not in cols:
            dataset[mean2] = np.round(pd.rolling_mean(dataset['Value'], window=mean2), 2)
        
        # Next lets create a new dataset to stop us from cluttering the old one
        new_dataset = pd.DataFrame(dataset[['Market']])
        # Work out the difference between the means
        new_dataset['Difference']= dataset[mean1] - dataset[mean2]
        
        # Work out what our buy/sell regime would be with these means and this standard deviation
        new_dataset['Regime'] = np.where(new_dataset['Difference'] > sd, 1, 0)
        new_dataset['Regime'] = np.where(new_dataset['Difference'] < -sd, -1, new_dataset['Regime'])
        return Strategy(new_dataset, name)

###Step 3: Next let's create a class to work out which strategy is performing best

In [None]:
class Winners():
    def __init__(self, strategies, market_series, max_mean):
        self.max_mean = max_mean
        items = {s.name:s.dataset for s in strategies}
        pan = pd.Panel(items)
        hourly_gains = pan.transpose(2,0,1)['hourly_gain'].transpose().dropna()
        self.dataset = pan.transpose(2,0,1)['Regime'].transpose().dropna()
        self.dataset['Market'] = market_series
        self.dataset['Winner'] = hourly_gains.idxmax(axis=1).dropna()
        # Drop any lines with null in
        self.dataset = self.dataset[self.dataset['Winner'].notnull()]
        
    
    def get_strategy(self, minutes):
        column_list = self.dataset.columns.values.tolist()
        winners = pd.DataFrame(self.dataset.ix[self.max_mean:])
        winners['resample'] = winners['Winner'].asfreq(pd.DateOffset(minutes=minutes))
        winners['resample']=winners['resample'].fillna(method='pad')
        winners['Regime'] = 0
        for group in column_list:
            winners["Regime"] = np.where(winners['Winner'] == group,winners[group],winners["Regime"])
        return Strategy.from_regime(winners, "{} minute".format(minutes))
    
    

###Step 4: Let's actually use these things and get some input data

In [None]:
means = range(60,1500,60)
sds = np.arange(0,1,0.1)
dataset = Dataset('stats_pickle5')
strategies = [Strategy.from_means(dataset.dataset, mean_tup[0],mean_tup[1], sd) for mean_tup in list(itertools.combinations(means, 2)) for sd in sds]

In [None]:
w = Winners(strategies, dataset.dataset['Market'],means[-1])

In [None]:
w.get_strategy(45).dataset[['Market', 'Strategy']].cumsum().apply(np.exp).plot(grid=True, figsize=(20,12))