In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import itertools 
from graphviz import Digraph

In [2]:
def plot_mat_corrs(figsize, annot, matrix):
    f, ax = plt.subplots(figsize=figsize)
    cmap = sns.diverging_palette(150, 275, s=80, l=55, as_cmap=True)
    sns.heatmap(matrix, cmap=cmap, center=0, annot=annot, vmax = max(matrix.max()), vmin= min(matrix.min()))
    plt.yticks(rotation=45)
    plt.xticks(rotation=45)
    plt.show()

In [4]:
def sig_corr(x1, x2, wlag, t_amostragem):
    correlations = np.zeros(wlag)
    for i in np.arange(0, wlag):
        correlations[i] = (1/(len(x1) - i))*np.dot(x1[i:],x2[:(len(x2) - i)])
    max_index = np.argmax(np.abs(correlations))
    return ([max_index*t_amostragem, np.round(correlations[max_index],2)])

In [5]:
def normalize_dfs(df):  
    df = (df - df.mean())/df.std()
    return df
        

In [6]:
def corrs(dist_df, wlag, t_amostragem):
    corr = np.zeros([dist_df.columns.size,dist_df.columns.size,2])
    for i, col_a in enumerate(dist_df.columns):
        for j, col_b in enumerate(dist_df.columns):
            corr[i][j] = sig_corr(dist_df[col_a], dist_df[col_b], wlag, t_amostragem)         
    return corr


In [7]:
def graph(limit, df_te, df_lag ,eng ='dot'):
    g = Digraph(engine=eng)
    for k, row in enumerate(df_te.index):
        if any(abs(df_te.iloc[k]) > limit) or any(abs(df_te[row]) > limit):
            g.node(str(k),row, shape='oval', fontsize='10', width='0', rankdir="LR") 
    for j, col in enumerate(df_te.columns):
        for i, row in enumerate(df_te[col]):
            if abs(row) > limit and i !=j:
                g.edge(str(i), str(j), label=str(np.round(row,6)) + str(' - lag: ') + str(df_lag[col][i]),\
                       color="#000000{:02x}".format(int(255)))
                #row//max(df_te.max())
    return g

In [8]:
def graph_simple(limit, df_te,eng ='dot'):
    g = Digraph(engine=eng)
    for k, row in enumerate(df_te.index):
        if any(abs(df_te.iloc[k]) > limit) or any(abs(df_te[row]) > limit):
            g.node(str(k),row, shape='oval', fontsize='10', width='0', rankdir="LR") 
    for j, col in enumerate(df_te.columns):
        for i, row in enumerate(df_te[col]):
            if abs(row) > limit and i !=j:
                g.edge(str(i), str(j), label=str(np.round(row,6)),\
                       color="#000000{:02x}".format(int(255)))
                #row//max(df_te.max())
    return g

In [10]:
dist6_process = pd.read_csv('dist6_process.csv', names=['xmeas%02d' % x for x in range(1, 74)])
dist6_process_svars = normalize_dfs(dist6_process[['xmeas01', 'xmeas02', 'xmeas03','xmeas08', 'xmeas09', 'xmeas21']].copy())



In [11]:
dist6_process_svars.head()

Unnamed: 0,xmeas01,xmeas02,xmeas03,xmeas08,xmeas09,xmeas21
0,0.015474,-0.239434,0.15648,-0.004949,0.001211,0.024789
1,-1.364283,-1.529994,0.474163,-0.217677,0.554758,-0.083594
2,-1.372042,-0.22962,0.405475,0.636214,-1.105884,0.133172
3,-1.36319,-0.700699,0.660193,-0.451279,-1.659432,0.40413
4,-1.36967,0.001012,-0.049584,0.161057,-2.212979,0.40413


In [12]:
matrix= corrs(dist6_process_svars, 100, 0.01)
corr_matrix = np.zeros([6,6])
lags_matrix = np.zeros([6,6])
for i in range(matrix.shape[0]):
    for j in range(matrix.shape[1]):
        corr_matrix[i][j] = matrix[i][j][1]
        lags_matrix[i][j] = matrix[i][j][0]

np.fill_diagonal(corr_matrix, 0)
df_corr =  pd.DataFrame(corr_matrix, columns = dist6_process_svars.columns, index= dist6_process_svars.columns)
df_lag =  pd.DataFrame(lags_matrix, columns = dist6_process_svars.columns, index= dist6_process_svars.columns)
#plot_mat_corrs(figsize=(10,10), annot=True, matrix = df_corr)

In [13]:
#graph(0.6, df_corr, df_lag)

In [14]:
#df_lag


In [15]:
df_count =  pd.DataFrame([], columns = dist6_process_svars.columns, index= ['normal', 'abnormal'])
df_count['xmeas01'][0] = 1
df_count

Unnamed: 0,xmeas01,xmeas02,xmeas03,xmeas08,xmeas09,xmeas21
normal,1.0,,,,,
abnormal,,,,,,


In [51]:
#Contagem

dist6 = pd.read_csv('dist6.csv', names=['xmeas%02d_low' % x for x in range(1, 74)] + ['xmeas%02d_high' % x for x in range(1, 74)])
dist6_sel_vars = dist6[ ['xmeas01_low', 'xmeas02_low', 'xmeas03_low', 
        'xmeas08_low', 'xmeas09_low', 'xmeas21_low',
        'xmeas01_high', 'xmeas02_high', 'xmeas03_high', 'xmeas08_high', 'xmeas09_high', 'xmeas21_high']]

df_count =  pd.DataFrame([], columns = dist6_sel_vars.columns, index= ['normal', 'abnormal', 'total'])

dist6_first_diff = dist6_sel_vars.diff(1)
dist6_first_diff.iloc[0] = dist6_sel_vars.iloc[0]
dist6_first_diff = dist6_first_diff.clip(0,1).copy()


normal_w = 800
abnormal_w = 200
 
df = dist6_first_diff
for col in dist6_sel_vars.columns:
    n = 0
    normal = 0
    abnormal = 0
    total = 0
    
    while( n < df.shape[0]):
        abnormal = abnormal  + np.sum(df[col][n:n+abnormal_w])
        normal = normal + np.sum(df[col][n+abnormal_w: n+ normal_w])
        n= n + normal_w + abnormal_w
        

    total = np.sum(df[col])
    df_count[col][0] = normal
    df_count[col][1] = abnormal
    df_count[col][2] = total
df_count    

Unnamed: 0,xmeas01_low,xmeas02_low,xmeas03_low,xmeas08_low,xmeas09_low,xmeas21_low,xmeas01_high,xmeas02_high,xmeas03_high,xmeas08_high,xmeas09_high,xmeas21_high
normal,75,52,359,2254,64,807,72,149,1104,640,325,149
abnormal,1,12,11,0,259,3,0,17,115,1616,2,58
total,76,118,462,2258,337,834,72,170,1223,2356,338,327


In [257]:
np.clip?

In [47]:


teste = {'a': [1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1], 'b':[1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0], 'c':[1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0]}
df =  pd.DataFrame(teste)

df_count =  pd.DataFrame([], columns = df.columns, index= ['normal', 'abnormal'])

normal_w = 5
abnormal_w = 3
 
for col in df.columns:
    n = 0
    normal = 0
    abnormal = 0
    
    while( n < df.shape[0]):
        abnormal = abnormal  + np.sum(df[col][n:n+abnormal_w])

        normal = normal + np.sum(df[col][n+abnormal_w: n+ normal_w])

        n= n + normal_w + abnormal_w
    df_count[col][0] = normal
    df_count[col][1] = abnormal
        
df_count




Unnamed: 0,a,b,c
normal,0,0,0
abnormal,9,6,6


In [52]:
df

Unnamed: 0,xmeas01_low,xmeas02_low,xmeas03_low,xmeas08_low,xmeas09_low,xmeas21_low,xmeas01_high,xmeas02_high,xmeas03_high,xmeas08_high,xmeas09_high,xmeas21_high
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
