# Confirmatory Analysis of Stimuli Data

In [1]:
import os
import re
import pandas as pd

In [2]:
path = "data/"

## Batch solution

In [3]:
result = []

dir_list = os.listdir(path)
for filename in dir_list:
    if filename.endswith(".csv"):
        # print(filename)
    
        # extract metadata from data generation
        posneg, deltasigma, axB, axC, axD = re.split("_", filename[:-4])
        
        # load data
        df = pd.read_csv(path + filename)
        
        result.append({
            'filename': filename, 
            'posneg': posneg,
            'DeltaSigma': deltasigma[1:], 
            'axisB': axB, 
            'axisC': axC, 
            'axisD': axD,
            'corrB': df.A.corr(df.B),
            'corrC': df.A.corr(df.C),
            'corrD': df.A.corr(df.D),
        })
    


In [4]:
resultDf = pd.DataFrame(result)    
resultDf

Unnamed: 0,filename,posneg,DeltaSigma,axisB,axisC,axisD,corrB,corrC,corrD
0,N_d100_170_170_70.csv,N,100,170,170,70,-0.825209,-0.815277,-0.949681
1,N_d100_170_70_170.csv,N,100,170,70,170,-0.820468,-0.950088,-0.822562
2,N_d100_70_170_170.csv,N,100,70,170,170,-0.945015,-0.827237,-0.826369
3,N_d10_70_80_80.csv,N,10,70,80,80,-0.949758,-0.938910,-0.941194
4,N_d10_80_70_80.csv,N,10,80,70,80,-0.943131,-0.945697,-0.936103
...,...,...,...,...,...,...,...,...,...
115,P_d90_160_70_160.csv,P,90,160,70,160,0.853809,0.947670,0.858700
116,P_d90_70_160_160.csv,P,90,70,160,160,0.943794,0.852519,0.853760
117,P_d9_70_79_79.csv,P,9,70,79,79,0.954918,0.942726,0.946886
118,P_d9_79_70_79.csv,P,9,79,70,79,0.937500,0.945773,0.936563


In [5]:
resultDf.to_csv('result.csv', index=False)

## Preparing the parts

In [6]:
dir_list = os.listdir(path)
print("Files and directories in '", path, "' :")
# prints all files
print(dir_list)

Files and directories in ' data/ ' :
['N_d100_170_170_70.csv', 'N_d100_170_70_170.csv', 'N_d100_70_170_170.csv', 'N_d10_70_80_80.csv', 'N_d10_80_70_80.csv', 'N_d10_80_80_70.csv', 'N_d150_220_220_70.csv', 'N_d150_220_70_220.csv', 'N_d150_70_220_220.csv', 'N_d200_270_270_70.csv', 'N_d200_270_70_270.csv', 'N_d200_70_270_270.csv', 'N_d20_70_90_90.csv', 'N_d20_90_70_90.csv', 'N_d20_90_90_70.csv', 'N_d2_70_72_72.csv', 'N_d2_72_70_72.csv', 'N_d2_72_72_70.csv', 'N_d30_100_100_70.csv', 'N_d30_100_70_100.csv', 'N_d30_70_100_100.csv', 'N_d3_70_73_73.csv', 'N_d3_73_70_73.csv', 'N_d3_73_73_70.csv', 'N_d40_110_110_70.csv', 'N_d40_110_70_110.csv', 'N_d40_70_110_110.csv', 'N_d4_70_74_74.csv', 'N_d4_74_70_74.csv', 'N_d4_74_74_70.csv', 'N_d50_120_120_70.csv', 'N_d50_120_70_120.csv', 'N_d50_70_120_120.csv', 'N_d5_70_75_75.csv', 'N_d5_75_70_75.csv', 'N_d5_75_75_70.csv', 'N_d60_130_130_70.csv', 'N_d60_130_70_130.csv', 'N_d60_70_130_130.csv', 'N_d6_70_76_76.csv', 'N_d6_76_70_76.csv', 'N_d6_76_76_70.csv', 'N

In [7]:
for filename in dir_list:
    if filename.endswith(".csv"):
        # Prints only text file present in My Folder
        print(filename)

N_d100_170_170_70.csv
N_d100_170_70_170.csv
N_d100_70_170_170.csv
N_d10_70_80_80.csv
N_d10_80_70_80.csv
N_d10_80_80_70.csv
N_d150_220_220_70.csv
N_d150_220_70_220.csv
N_d150_70_220_220.csv
N_d200_270_270_70.csv
N_d200_270_70_270.csv
N_d200_70_270_270.csv
N_d20_70_90_90.csv
N_d20_90_70_90.csv
N_d20_90_90_70.csv
N_d2_70_72_72.csv
N_d2_72_70_72.csv
N_d2_72_72_70.csv
N_d30_100_100_70.csv
N_d30_100_70_100.csv
N_d30_70_100_100.csv
N_d3_70_73_73.csv
N_d3_73_70_73.csv
N_d3_73_73_70.csv
N_d40_110_110_70.csv
N_d40_110_70_110.csv
N_d40_70_110_110.csv
N_d4_70_74_74.csv
N_d4_74_70_74.csv
N_d4_74_74_70.csv
N_d50_120_120_70.csv
N_d50_120_70_120.csv
N_d50_70_120_120.csv
N_d5_70_75_75.csv
N_d5_75_70_75.csv
N_d5_75_75_70.csv
N_d60_130_130_70.csv
N_d60_130_70_130.csv
N_d60_70_130_130.csv
N_d6_70_76_76.csv
N_d6_76_70_76.csv
N_d6_76_76_70.csv
N_d70_140_140_70.csv
N_d70_140_70_140.csv
N_d70_70_140_140.csv
N_d7_70_77_77.csv
N_d7_77_70_77.csv
N_d7_77_77_70.csv
N_d80_150_150_70.csv
N_d80_150_70_150.csv
N_d80_7

In [8]:
filename = "P_d50_70_120_120.csv"

In [9]:
posneg, deltasigma, axB, axC, axD = re.split("_", filename[:-4])
posneg, deltasigma, axB, axC, axD 

('P', 'd50', '70', '120', '120')

In [10]:
df = pd.read_csv(path + filename)

In [11]:
df 

Unnamed: 0,A,B,C,D
0,0.000000,0.049809,0.167421,0.120730
1,0.001451,0.000478,0.028712,0.004447
2,0.011098,0.138569,0.277630,0.195778
3,0.013080,0.000000,0.091005,0.124645
4,0.016068,0.141106,0.180116,0.000000
...,...,...,...,...
295,0.975749,0.893072,0.998393,0.997588
296,0.993232,0.965085,0.885781,0.863621
297,0.996169,0.905919,0.826083,0.837361
298,0.999476,0.895382,0.867007,0.889126


<https://realpython.com/numpy-scipy-pandas-correlation-python/#pearson-correlation-pandas-implementation>

In [12]:
df.A.corr(df.B)

0.9511329693742219

In [13]:
df.A.corr(df.C)

0.9042624945501799

In [14]:
df.A.corr(df.D)

0.9109789980609141

In [15]:
df.corr()

Unnamed: 0,A,B,C,D
A,1.0,0.951133,0.904262,0.910979
B,0.951133,1.0,0.90395,0.907989
C,0.904262,0.90395,1.0,0.872423
D,0.910979,0.907989,0.872423,1.0
