## Statististical Analysis for the Data from a Heterogeneous Market with a Rapidly Changing Equilibrium

In [None]:
import scipy as sp
from scipy import stats
import seaborn as sns 
import matplotlib.pyplot as plt 
import statsmodels.api as sm
import pandas as pd
import numpy as np
import glob
import os

In [None]:
df = pd.read_csv('dhets_3.csv', names = ['avg_profit1', 'run_n', 'k', 'F'])
df = df.iloc[1: , :]

### Normality

In [None]:
#determine the distribution of data within a k or F
k_range = np.logspace(1, 10, num=10, base=2)
k_range = k_range[1:10]
k_range = k_range.astype(int)
F_range = np.linspace(0.0, 2.0, 10)
for k in k_range:
    statistic, pvalue = stats.shapiro([x['avg_profit1'] for x in df.iloc if x['k'] == str(k)])
    if pvalue < 0.05:
        print("Condition " + "{:}".format(k) + 
              ". We can reject the null hypothesis (p=" + 
              "{:.2f}".format(pvalue) + 
              "). Therefore, data is not normally distributed.")
    else:
        print("Condition " + "{:}".format(k) + 
              ". We cannot reject the null hypothesis (p=" + 
              "{:.2f}".format(pvalue) + 
              "). Therefore, data is normally distributed.")

for f in F_range:
    statistic, pvalue = stats.shapiro([x['avg_profit1'] for x in df.iloc if x['F'] == str(f)[0:3]])
    if pvalue < 0.05:
        print("Condition " + "{:}".format(f) + 
              ". We can reject the null hypothesis (p=" + 
              "{:.2f}".format(pvalue) + 
              "). Therefore, data is not normally distributed.")
    else:
        print("Condition " + "{:}".format(f) + 
              ". We cannot reject the null hypothesis (p=" + 
              "{:.2f}".format(pvalue) + 
              "). Therefore, data is normally distributed.")

In [None]:
normal_count = 0
not_normal_count = 0
for k in k_range:
    for f in F_range:
        statistic, pvalue = stats.shapiro([x['avg_profit1'] for x in df.iloc if (x['k'] == str(k) and x['F'] == str(f)[0:3])])
        if pvalue < 0.05:
            print("Condition " + "{:}".format(k) + " " + "{:}".format(str(f)[0:3]) +
                ". We can reject the null hypothesis (p=" + 
                "{:.2f}".format(pvalue) + 
                "). Therefore, data is not normally distributed.")
            not_normal_count = not_normal_count + 1
        else:
            print("Condition " + "{:}".format(k) + " " + "{:}".format(str(f)[0:3]) +
                ". We cannot reject the null hypothesis (p=" + 
                "{:.2f}".format(pvalue) + 
                "). Therefore, data is normally distributed.")
            normal_count = normal_count + 1
print(normal_count)
print(not_normal_count)

### Hypothesis testing

In [None]:
by_ks = pd.DataFrame(columns=k_range)
print(by_ks)
temp = []
for i in k_range:
    for x in df.iloc:
        if x['k'] == str(i):
            temp.append(x['avg_profit1'])
    by_ks[i] = temp
    temp = []

by_ks.head()

In [None]:
F_range = [float(str(j)[0:3]) for j in F_range]
by_fs = pd.DataFrame(columns=F_range)
print(by_fs)
temp = []
for i in F_range:
    for x in df.iloc:
        if x['F'] == str(i):
            temp.append(x['avg_profit1'])
    by_fs[i] = temp
    temp = []

by_fs.head()

##### Kruskal-Wallis

In [None]:
statistics, pvalue = stats.kruskal(by_ks[4], by_ks[8],by_ks[16],by_ks[32], by_ks[64], by_ks[128], by_ks[256], by_ks[512], by_ks[1024])
if pvalue < 0.05:
    print("KW: (p=" + "{:.2f}".format(pvalue) + 
          " < 0.05). Reject null. The groups have a " +
          "different population mean.") 
else:
    print("KW: (p=" + "{:.2f}".format(pvalue) + 
          " > 0.05). Cannot reject null hypothesis " + 
          "that groups have same population mean.")

In [None]:
statistics, pvalue =  stats.kruskal(by_fs[0.0], by_fs[0.2], by_fs[0.4], by_fs[0.6], by_fs[0.8], by_fs[1.1], by_fs[1.3], by_fs[1.5], by_fs[1.7], by_fs[2.0])
if pvalue < 0.05:
    print("KW: (p=" + "{:.2f}".format(pvalue) + 
          " < 0.05). Reject null. The groups have a " +
          "different population mean.") 
else:
    print("KW: (p=" + "{:.2f}".format(pvalue) + 
          " > 0.05). Cannot reject null hypothesis " + 
          "that groups have same population mean.")