## Visualisation - Static Heterogeneous Sessions
In this notebook I reformat the data so that it lends itself to being visualised and analysed. Then I apply  visualisation techniques in order to extract insights from the data.

In [None]:
import scipy as sp
from scipy import stats
import seaborn as sns 
import matplotlib.pyplot as plt 
import statsmodels.api as sm
import pandas as pd
import numpy as np
import glob
import os

### Preprocessing

In [None]:
path = r'/home/karolina/ieft_cw/data/shet' 
all_files = glob.glob(os.path.join(path, '*.csv'))

df =pd.DataFrame()
df.to_csv("shet_1.csv", 'w')
for f in all_files:
     if 'avg_bal' in f:
          n_rows = sum(1 for row in open(f, 'r'))
          row = pd.read_csv(f)
          row.to_csv("shet_1.csv", mode='a', index=False, header=True)

# df contains the combined static homogenous sessions average final balances sorted by the run description string
df = pd.read_csv("shet_1.csv", names = ['run', 'trade', 'best_bid', 'best_offer', 'trader_name0', 'total_profit0', 'traders_number0', 'avg_profit0',
 'trader_name1', 'total_profit1', 'traders_number1', 'avg_profit1',
 'trader_name2', 'total_profit2', 'traders_number2', 'avg_profit2',
 'trader_name3', 'total_profit3', 'traders_number3', 'avg_profit3',
 'trader_name4', 'total_profit4', 'traders_number4', 'avg_profit4',
 'trader_name5', 'total_profit5', 'traders_number5', 'avg_profit5'])
df = df.iloc[1: , :]
df = df.sort_values(by=['run'])

#the run number, k and F are extracted from the run string and added as columns to the dataframe
run_n = [str(i)[5] for i in df['run']]
df = df.assign(run_n=run_n)

# now the chars are converted to int and then column k
k = []
for i in df['run']:
    j = 7
    num_temp = ''
    while str(i)[j] != '_':
        num_temp = num_temp + str(i)[j]
        j = j+1
    k.append(int(num_temp))
df = df.assign(k=k)

# same for F
f = [float(str(i)[-3:]) for i in df['run']]
df = df.assign(F=f)

#drop the run name and and dump to a new csv file
df.drop('run', inplace=True, axis=1)
df = df.sort_values(by=['run_n', 'k', 'F'])
df.to_csv('shet_2.csv', columns = ['trade', 'best_bid', 'best_offer',
 'trader_name0', 'total_profit0', 'traders_number0', 'avg_profit0',
 'trader_name1', 'total_profit1', 'traders_number1', 'avg_profit1',
 'trader_name2', 'total_profit2', 'traders_number2', 'avg_profit2',
 'trader_name3', 'total_profit3', 'traders_number3', 'avg_profit3',
 'trader_name4', 'total_profit4', 'traders_number4', 'avg_profit4',
 'trader_name5', 'total_profit5', 'traders_number5', 'avg_profit5',
'run_n', 'k', 'F'])

df.drop(['trade', 'best_bid', 'best_offer',
 'trader_name0', 'total_profit0', 'traders_number0', 'avg_profit0',
 'trader_name2', 'total_profit2', 'traders_number2', 'avg_profit2',
 'trader_name3', 'total_profit3', 'traders_number3', 'avg_profit3',
 'trader_name4', 'total_profit4', 'traders_number4', 'avg_profit4',
 'trader_name5', 'total_profit5', 'traders_number5', 'avg_profit5'], inplace=True, axis=1)

df.to_csv('shet_3.csv', columns = [ 'trader_name1', 'total_profit1', 'traders_number1', 'avg_profit1', 'run_n', 'k', 'F'])

df.drop(['trader_name1', 'total_profit1', 'traders_number1'], inplace=True, axis=1)
df.to_csv('shet_3.csv', columns = [ 'avg_profit1', 'run_n', 'k', 'F'])

In [None]:
#summarize all runs
k_range = np.logspace(1, 10, num=10, base=2)
k_range = k_range[1:10]
F_range = np.linspace(0.0, 2.0, 10)
F_range = [float(str(j)[0:3]) for j in F_range]

df_sum = pd.DataFrame(columns=['k', 'F', 'avg_profit'])

df_sum['k'] = [k for k in df['k'].head(90)]
df_sum['F'] = [f for f in df['F'].head(90)]

avg_prof_means = []
for index, row in df.iterrows():
    if int(row['run_n']) == 0:
        avg_prof_means.append(row['avg_profit1'])
    else:
        avg_prof_means[index%90] += row['avg_profit1']

avg_prof_means = [i/10 for i in avg_prof_means]
df_sum['avg_profit'] = avg_prof_means

df_sum.to_csv('shet_sum.csv', columns = ['k', 'F', 'avg_profit'])

### Visualisation

In [None]:
F_range = np.linspace(0.0, 2.0, 10)
for j in F_range:
    dfx = df_sum.loc[df_sum['F'] == float(str(j)[0:3])]
    plt.plot(dfx['k'], dfx['avg_profit'])

In [None]:
k_range = np.logspace(1, 10, num=10, base=2)
k_range = k_range[1:10]
for j in k_range:
    dfx = df_sum.loc[df_sum['k'] == j]
    plt.plot(dfx['F'], dfx['avg_profit'])

In [None]:
boxplot = df.boxplot(column='avg_profit1', by = 'k')  
boxplot.set_title('')
plt.title('')
plt.suptitle('')
plt.ylabel('average profit per trader')
boxplot = plt.show()

In [None]:
dfF = df.loc[df['F'] == 0.8]
dfF.head()
boxplot = dfF.boxplot(column='avg_profit1', by = 'k')  
boxplot.set_title('')
plt.title('')
plt.suptitle('')
plt.ylabel('average profit per trader')
boxplot = plt.show()

In [None]:
boxplot = df.boxplot(column='avg_profit1', by = 'F')  
boxplot.set_title('')
plt.title('')
plt.suptitle('')
plt.ylabel('average profit per trader')
boxplot = plt.show()

In [None]:
dfk = df.loc[df['k'] == 4]
dfk.head()
boxplot = dfk.boxplot(column='avg_profit1', by = 'F')  
boxplot.set_title('')
plt.title('')
plt.suptitle('')
plt.ylabel('average profit per trader')
boxplot = plt.show()

In [None]:
sorted_df = df.sort_values(['avg_profit1'], ascending=False)
print(sorted_df.head())
sorted_df_sum = df_sum.sort_values(['avg_profit'], ascending=False)
print(sorted_df_sum.head())