In [1]:
from pandas_datareader import data
import numpy as np
import pandas as pd
import os
%matplotlib inline

In [2]:
CWD = os.getcwd()
DATA_CLEAN_DIR = os.path.join(CWD, 'data/Clean/')

start_date = '2000-01-01'
end_date = '2017-01-01'

stocks = ['ABBN','CSGN','NESN','NOVN']
stocks_yahoo = ['ABBN.VX','CSGN.VX','NESN.VX','NOVN.VX']
stocks_pred = ['ABBN_pred', 'CSGN_pred', 'NESN_pred', 'NOVN_pred']

## Getting Yahoo Data

In [3]:
for stock in stocks_yahoo:
    if stock == 'ABBN.VX':
        data_yahoo = data.DataReader(stock, 'yahoo', start_date, end_date)
        data_yahoo = data_yahoo.drop(columns=['High','Low','Open','Close','Volume'])
        data_yahoo = data_yahoo.rename(columns={'Adj Close':stock})
    else:
        data_yahoo_temp = data.DataReader(stock, 'yahoo', start_date, end_date)
        data_yahoo[stock] = data_yahoo_temp['Adj Close']
data_yahoo = data_yahoo / data_yahoo.iloc[0] * 100
data_yahoo.columns = stocks
data_yahoo.to_csv(os.path.join(DATA_CLEAN_DIR, 'yahoo.csv.gz'), compression='gzip')

## Load Prediction and Yahoo Data

In [4]:
data_yahoo = pd.read_csv(os.path.join(DATA_CLEAN_DIR, 'yahoo.csv.gz'), compression='gzip', index_col='Date')
cumsum_preds_2004 = pd.read_csv(os.path.join(DATA_CLEAN_DIR, 'cumsum_preds_2004.csv.gz'), compression='gzip',index_col='Unnamed: 0')
cumsum_preds_2008 = pd.read_csv(os.path.join(DATA_CLEAN_DIR, 'cumsum_preds_2008.csv.gz'), compression='gzip',index_col='Unnamed: 0')
cumsum_preds_2012 = pd.read_csv(os.path.join(DATA_CLEAN_DIR, 'cumsum_preds_2012.csv.gz'), compression='gzip',index_col='Unnamed: 0')
cumsum_preds_2016 = pd.read_csv(os.path.join(DATA_CLEAN_DIR, 'cumsum_preds_2016.csv.gz'), compression='gzip',index_col='Unnamed: 0')

In [5]:
def verifier(real_data, prediction):
    '''
    Verifies how much was won or lost during prediction period, by computing the pointwise
    product between the prediction and the difference of the real price, then summing up.
    
    Parameters
    ----------
    real_data:
        Dataframe containing the real prices.
    prediction:
        Dataframe containing the sign of the prediction.
        
    Returns
    -------
    result: float
        sum of pointwise multiplication
    '''
    # Index of dataframes to datetime
    real_data.index = pd.to_datetime(real_data.index)    
    prediction.index = pd.to_datetime(prediction.index)
    prediction = prediction.iloc[24::25,:]
    prediction.index = prediction.index.date
    prediction = np.sign(prediction)
    #New dataframe with index of prediction
    verification = prediction.join(real_data,how='left')
    verification[real_data.columns] = verification[real_data.columns].diff()
    verification = verification.dropna()
    result = (verification[prediction.columns].values * verification[real_data.columns]).sum().sum()
    return result

In [6]:
print(verifier(data_yahoo, cumsum_preds_2004))
print(verifier(data_yahoo, cumsum_preds_2008))
print(verifier(data_yahoo, cumsum_preds_2012))
print(verifier(data_yahoo, cumsum_preds_2016))

-82.46556960125604
194.2200104688197
84.67447512688608
297.94742116537395
