In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import itertools 
import networkx as nx

In [2]:
def plot_mat_corrs(figsize, annot, matrix):
    f, ax = plt.subplots(figsize=figsize)
    cmap = sns.diverging_palette(150, 275, s=80, l=55, as_cmap=True)
    sns.heatmap(matrix, cmap=cmap, center=0, annot=annot)
    plt.show()

In [3]:
def plot_stem_corrs(x, y, figsize, labels, ang_rot_lbls):
    x = [i for i in np.arange(0, len(y))]
    plt.figure(figsize=figsize)
    plt.stem(x,y, linefmt='b-', markerfmt='bo', basefmt='r-')
    plt.xticks(x, labels, rotation='60')
    plt.show()

In [4]:
def sig_corr(x1, x2, wlag, t_amostragem,x,y):
    correlations = np.zeros(wlag)
    for i in np.arange(0, wlag):
        correlations[i] = (1/(len(x1) - i))*np.dot(x1[i:],x2[:(len(x2) - i)])
    max_index = np.argmax(np.abs(correlations))
    return ([max_index*t_amostragem, np.round(correlations[max_index],2)])

In [19]:
def pcent_diff_abs(corrs1, corrs2):
    percent_diff = []
    corr_vars = []
    for  corr2, corr1 in zip(corrs2, corrs1):
        if(corr1[3]!= 0):
            percent_diff.append((np.abs((corr2[3] - corr1[3])/corr1[3])))
        else:
            percent_diff.append(np.abs(corr2[3]))
        corr_vars.append((corr2[0], corr2[1]))
    return [percent_diff,corr_vars]

In [12]:
def get_corrs_with_pairs( matrix, threshold = None):
    most_correlated_var = []
    for i in np.arange(0, len(matrix[:][:][1]), 1):
        for j in np.arange(0, len(matrix[:][:][1]), 1):
            if threshold == None:
                pair_value = (i, j, matrix[i][j][0], matrix[i][j][1], matrix[i][j][3])
                most_correlated_var.append(pair_value)
            elif (np.abs(matrix[i][j][1]) > threshold ):
                pair_value = (i, j, matrix[i][j][0], matrix[i][j][1], matrix[i][j][3])
                most_correlated_var.append(pair_value)
    return most_correlated_var   

In [13]:
def get_corr_var(matrix, vars):
    correlations = []
    for var in vars:
        correlations.append(matrix[int(var[0])][int(var[1])][1])
    return correlations    

In [14]:
def normalize_dfs(dfs):
    all_dfs = pd.concat(dfs)
    norm_dfs = []
   
    all_dfs_norm =  (all_dfs - all_dfs.mean())/all_dfs.std()
    all_dfs_norm['tout'] = all_dfs['tout']
    
    num_linhas = int(len(all_dfs)/len(dfs))
    num_blocks = int(len(all_dfs)/len(dfs[0]))
    
    for i in np.arange(0, num_blocks):
        df = all_dfs_norm.iloc[(i*num_linhas) : (i*num_linhas + num_linhas),:]
        norm_dfs.append(df)
    return norm_dfs
        

In [15]:
def get_dfs(init, end):
    dfs = []
    for i in np.arange(init,end + 1):
        dfs.append(pd.read_csv('/home/rute/Pesquisa/data_dezembro/simout_' + str(i) + '.csv'))
    return dfs

In [16]:
def mean_correlation(wlag, dfs = []):
    matrix_corr_list = []
    m_sum_complete = np.zeros([22 , 22, 4])
    dfs_norm = normalize_dfs(dfs)
    
    for df in dfs_norm:
        tam_m = int(len(df.columns[1:23]))
        m_corr = np.zeros((tam_m,tam_m,4))
        for i, x in enumerate(df.columns[1:23]):
            for j, y in enumerate(df.columns[1:23]):
                corr = sig_corr(np.array(df[x]), np.array(df[y]),wlag, 0.01, x, y)
                corr.extend([0,0])
                m_corr[i][j] = corr
        matrix_corr_list.append(m_corr)
    
    for m in matrix_corr_list:
        m[:,:,1] = m[:,:,1]/np.diagonal(m[:,:,1])

    m_mean = np.round(np.mean(matrix_corr_list, axis =0),2)
    m_std = np.std(matrix_corr_list, axis=0)

    for i in np.arange(0, len(m_mean)):
        for j in np.arange(0, len(m_mean)):
            m_mean[i][j][2] = m_std[i][j][0]
            m_mean[i][j][3] = m_std[i][j][1]
    return m_mean

In [17]:
regular_sims = get_dfs(1,10)
disturb_1_sims = get_dfs(11,14)

In [26]:
wlag = int(2/0.01)
corrs_reg_sims = mean_correlation(wlag, regular_sims)
corrs_dist_1_25 = mean_correlation(wlag, [disturb_1_sims[0]])
corrs_dist_1_50 = mean_correlation(wlag, [disturb_1_sims[1]])
corrs_dist_1_75 = mean_correlation(wlag, [disturb_1_sims[2]])
corrs_dist_1_100 = mean_correlation(wlag, [disturb_1_sims[3]])

corrs_no_dist_with_pairs = get_corrs_with_pairs(corrs_reg_sims)
corrs_25_with_pairs_d1 = get_corrs_with_pairs(corrs_dist_1_25)
corrs_50_with_pairs_d1 = get_corrs_with_pairs(corrs_dist_1_50)
corrs_75_with_pairs_d1 = get_corrs_with_pairs(corrs_dist_1_75)
corrs_100_with_pairs_d1 = get_corrs_with_pairs(corrs_dist_1_100)

In [135]:
diff_percent = pcent_diff_abs(corrs_no_dist_with_pairs, corrs_25_with_pairs_d1)
perc_serie  = pd.Series(diff_percent[0], index=diff_percent[1])
perc_serie.sort_values(inplace=True, ascending=False)

24

In [160]:
#disturb_1_sims[3][disturb_1_sims[3].columns[1:]].plot(figsize=(16,30), subplots=True)
plt.show()