## Analysis

This notebook contains the code for the analysis of the final dataset. While rigorous econometric analysis is limited (due to the nature of the dataset), some basic tests for market efficiency can reveal a lot of insight as to how the machine learning traders affected the market.

The tests we will conduct will look at a variety of factors, including 

In [33]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import csv
import glob

In [34]:
def unpack_data(filepath): #Returns a dictionary where each item is a df containing one run. One dict per generation.
    all_files = glob.glob(filepath + "/*.csv")
    name_list = []
    datadict = {}
    for f in all_files:
        name_list.append(f[77:-4])
    for i,n in enumerate(name_list):
        datadict[n] = pd.read_csv(all_files[i], header=0)
#    for k, v in datadict.items():
#        v.drop(columns=["Unnamed: 0", "volume", "spread", "10_MA", "50_MA"], inplace=True)
    return datadict

In [35]:
gen0 = unpack_data("/Users/karangarg/Documents/Year 3 Modules/EC331/Code/rae_repo/gen0_sims/data") #Load gen0 data

In [36]:
def compute_mean_squared_diff(datadict): #Returns average mean squared difference, i.e. the square of the average distance between the true price and trading price for any given period in any given simlation
    msd_list = []
    msd_dict = {}
    for k, v in datadict.items():
        diff_list = []
        for i in range(len(v)):
            squared_diff = (v.iloc[i]["trading_price"] - v.iloc[i]["true_price"])**2
            diff_list.append(squared_diff)
        msd = sum(diff_list)/len(diff_list)
        msd_list.append(msd)
        msd_dict[k] = msd
    avg_msd = sum(msd_list)/len(msd_list)
    return avg_msd, msd_dict

In [37]:
#gen0_msd, gen0_msd_dict = compute_mean_squared_diff(gen0)

In [38]:
def reversion_rate(datadict): #For any given run of the market, how many times does price deviate and how long does it take to return to the true price
    for k, v in datadict.items():
        run_count = 0
        dev_return_list = []
        for i in range(1, len(v)):
            if abs(v.iloc[i]["trading_price"] - v.iloc[i]["true_price"]) > 1 and abs(v.iloc[i-1]["trading_price"] - v.iloc[i-1]["true_price"]) <= 1:
                for t in range(1, len(v)-i+1):
                    if abs(v.iloc[i+t-1]["trading_price"] - v.iloc[i+t-1]["true_price"]) > 1 and t == len(v)-i:
                        dev_return_list.append(("nr", t))
                        break
                    elif abs(v.iloc[i+t]["trading_price"] - v.iloc[i+t]["true_price"]) <= 1:
                        dev_return_list.append(("r", t))
                        break
    return dev_return_list

In [32]:
reversion_rate(gen0)

[('r', 3),
 ('r', 4),
 ('r', 6),
 ('r', 7),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 3),
 ('r', 3),
 ('r', 3),
 ('r', 2),
 ('r', 8),
 ('r', 14),
 ('r', 4),
 ('r', 3),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 3),
 ('r', 4),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2),
 ('r', 2)]

In [41]:
def inefficient_proportion(datadict): #Computes the proportion of time that the true and trading price deviate (1%)
    prop_list = []
    for k, v in datadict.items():
        prop_temp = 0
        for i in range(len(v)-1):
            if abs(v.iloc[i]["trading_price"] - v.iloc[i]["true_price"]) > 1:
                prop_temp +=1
        prop_list.append((prop_temp/len(v)))
    prop = sum(prop_list)/len(prop_list)
    return prop

In [42]:
inefficient_proportion(gen0)

0.5210179168669219