# Imports

In [156]:
from pprint import pprint
from os import listdir
from os.path import join, isfile
import json

import numpy as np
from scipy.spatial import distance_matrix
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

# Analysis

## Retrieving paths of each simulation saved

In [157]:
data_path = join(".", "data")
simulations_names = [f for f in listdir(data_path)
                     if not isfile(f) and f not in [".gitkeep", ".gitignore"]]

## Retrieving causality strength for each symbol in each simulation

In [158]:
causalities_strengths = []
for simulation_name in simulations_names:
    simulation_path = join(data_path, simulation_name)
    if "pc.json" not in listdir(simulation_path):
        print(f"Simulation {simulation_path} has no pc.json file")
        continue
    with open(join(simulation_path, "pc.json"), "r") as pc_file:
        causalities_strengths += [pd.DataFrame.from_dict(json.load(pc_file)).sort_index()]

print(f"Causalities' strengths")
pprint(causalities_strengths)

Simulation ./data/realistic_scenario123456 has no pc.json file
Causalities' strengths
[           ETF      SYM1      SYM2      SYM3
ETF        NaN  0.235376  0.105850  0.077994
SYM1  0.235376       NaN -0.050139 -0.002786
SYM2  0.105850 -0.050139       NaN  0.032033
SYM3  0.077994 -0.002786  0.032033       NaN,
            ETF      SYM1      SYM2      SYM3
ETF        NaN  0.222841  0.036212  0.045961
SYM1  0.222841       NaN -0.029248  0.047354
SYM2  0.036212 -0.029248       NaN  0.027855
SYM3  0.045961  0.047354  0.027855       NaN,
            ETF      SYM1      SYM2      SYM3
ETF        NaN  0.243733  0.139276  0.026462
SYM1  0.243733       NaN  0.002786  0.023677
SYM2  0.139276  0.002786       NaN  0.050139
SYM3  0.026462  0.023677  0.050139       NaN,
            ETF      SYM1      SYM2      SYM3
ETF        NaN  0.247911  0.093315  0.005571
SYM1  0.247911       NaN -0.075209 -0.020891
SYM2  0.093315 -0.075209       NaN  0.033426
SYM3  0.005571 -0.020891  0.033426       NaN,
      

## Stats about causalities

In [159]:
symbols_names = list(causalities_strengths[0].columns)
causalities_strengths = np.array([df.values.tolist() for df in causalities_strengths])

print(f"\nMean")
print(pd.DataFrame(data=causalities_strengths.mean(axis=0),
                   columns=symbols_names, index=symbols_names))
print(f"\nVariance")
print(pd.DataFrame(data=causalities_strengths.var(axis=0),
                   columns=symbols_names, index=symbols_names))
print(f"\nStandard deviation")
print(pd.DataFrame(data=causalities_strengths.std(axis=0),
                   columns=symbols_names, index=symbols_names))




Mean
           ETF      SYM1      SYM2      SYM3
ETF        NaN  0.252089  0.106778  0.066852
SYM1  0.252089       NaN -0.014856  0.009749
SYM2  0.106778 -0.014856       NaN  0.019499
SYM3  0.066852  0.009749  0.019499       NaN

Variance
           ETF      SYM1      SYM2      SYM3
ETF        NaN  0.000774  0.001686  0.002463
SYM1  0.000774       NaN  0.002214  0.000480
SYM2  0.001686  0.002214       NaN  0.000588
SYM3  0.002463  0.000480  0.000588       NaN

Standard deviation
           ETF      SYM1      SYM2      SYM3
ETF        NaN  0.027820  0.041062  0.049627
SYM1  0.027820       NaN  0.047050  0.021904
SYM2  0.041062  0.047050       NaN  0.024244
SYM3  0.049627  0.021904  0.024244       NaN
