# Generacion Tablas de redes de contacto residuo a residuo y redes de correlacion dinamica

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import community

In [2]:
red_dimer_con_cofact = nx.read_gexf('gexf/red_dimer_con_cofact.gexf')
red_tetra_con_cofact = nx.read_gexf('gexf/red_tetra_con_cofact.gexf')
red_dimer_sin_cofact = nx.read_gexf('gexf/red_dimer_sin_cofact.gexf')
red_tetra_sin_cofact = nx.read_gexf('gexf/red_tetra_sin_cofact.gexf')

In [13]:
def measuring(red):
    dgr = nx.degree_centrality(red)
#     clo = nx.closeness_centrality(red)
#     har = nx.harmonic_centrality(red)
#     eig = nx.eigenvector_centrality_numpy(red)
    bet = nx.betweenness_centrality(red)
#     pgr = nx.pagerank(red)
#     hits = nx.hits_numpy(red)
    
    centralities = pd.concat(
    [pd.Series(c) for c in (
#         hits[1],
#                             eig,
#                             pgr,
#                             har,
#                             clo,
#                             hits[0],
                            dgr,
                            bet)],
    axis=1)
    centralities.columns = [
#         "Authorities",
#                             "Eigenvector", 
#                             "PageRank",
#                            "Harmonic_Closeness", 
#                             "Closeness","Hubs",
                           "Degree", "Betweenness"]
#     centralities["Harmonic_Closeness"] /= centralities.shape[0]
    
    return centralities

In [22]:
df_rdc = measuring(red_dimer_con_cofact)
df_rds = measuring(red_dimer_sin_cofact)
df_rtc = measuring(red_tetra_con_cofact)
df_rts = measuring(red_tetra_sin_cofact)

In [26]:
part_rdc = community.best_partition(red_dimer_con_cofact, random_state=8)
part_rds = community.best_partition(red_dimer_sin_cofact, random_state=8)
part_rtc = community.best_partition(red_tetra_con_cofact, random_state=8)
part_rts = community.best_partition(red_tetra_sin_cofact, random_state=8)

In [32]:
def get_table_df_res_contact(df):
    df = df[['Degree', 'Betweenness']].reset_index()
    df.columns = ['Num_Residuo', 'Grado', 'Intermediacion']
    df.Num_Residuo = df.Num_Residuo.astype(int)
    df = df.sort_values('Num_Residuo').reset_index(drop=True)
    
    return df

In [34]:
table_rdc = get_table_df_res_contact(df_rdc)
table_rds = get_table_df_res_contact(df_rds)
table_rtc = get_table_df_res_contact(df_rtc)
table_rts = get_table_df_res_contact(df_rts)

In [36]:
table_rdc.to_csv('tabla_red_dimer_con_contact_resres.csv', index=False)
table_rds.to_csv('tabla_red_dimer_sin_contact_resres.csv', index=False)
table_rtc.to_csv('tabla_red_tetra_con_contact_resres.csv', index=False)
table_rts.to_csv('tabla_red_tetra_sin_contact_resres.csv', index=False)

# Tablas RCD

In [4]:
matrix_adyacente_dimer_con_cofat = nx.to_numpy_matrix(red_dimer_con_cofact)
matrix_adyacente_dimer_sin_cofat = nx.to_numpy_matrix(red_dimer_sin_cofact)
matrix_adyacente_tetra_con_cofat = nx.to_numpy_matrix(red_tetra_con_cofact)
matrix_adyacente_tetra_sin_cofat = nx.to_numpy_matrix(red_tetra_sin_cofact)

In [5]:
def get_matrix_correlation(path, matriz_adyacente):
    """de datos de correlacion cruzada obtenemos la matriz de adyacencia de correlacion"""
    #lectura de datos
    data = pd.DataFrame([i.split("\n")[0].split() for i in open(path).readlines()],columns=[
    'res_1','res_2','correlation'])
    
    # generar matriz
    df = pd.pivot(
    data.res_1.astype(int).values,
    data.res_2.astype(int).values,
    data.correlation.astype(float).values).fillna(0)
    
    def func2(A):
        """De matriz triangular a matriz completa
        A: numpy.array
        out: numpy.array"""
        out = A.T + A
        idx = np.arange(A.shape[0])
        out[idx,idx] = A[idx,idx]
        return out
    
    
    values_complete = func2(np.triu(df.values))
    
    df_complete = pd.DataFrame(values_complete)
    
    mat_corr = df_complete.values
    
    print('matriz de correlacion',mat_corr.shape)
    print('matriz de contacto',matriz_adyacente.shape)
    print('=='*40)
    
    pre_mat = np.multiply(mat_corr, matriz_adyacente)
    
    for i in range(df_complete.shape[0]):
        for j in range(df_complete.shape[0]):
        # cambio condicional
            if pre_mat[i,j] == 0 and abs(mat_corr[i,j]) > 0.60:
                pre_mat[i,j] = mat_corr[i,j]
        # quito selfloops
            if i == j:
                pre_mat[i,j] = 0
            
    return df, df_complete, pre_mat

In [8]:
tri_dimer_con,complete_dimer_con, matrix_corr_dimer_con = get_matrix_correlation(
    'ddc_dimer_con.txt', matrix_adyacente_dimer_con_cofat)
tri_dimer_sin,complete_dimer_sin, matrix_corr_dimer_sin = get_matrix_correlation(
    'dcc_dimer_sin.txt', matrix_adyacente_dimer_sin_cofat)

tri_tetra_con,complete_tetra_con, matrix_corr_tetra_con = get_matrix_correlation(
    'dcc_tetra_con.txt', matrix_adyacente_tetra_con_cofat)
tri_tetra_sin,complete_tetra_sin, matrix_corr_tetra_sin = get_matrix_correlation(
    'dcc_tetra_sin.txt', matrix_adyacente_tetra_sin_cofat)

matriz de correlacion (352, 352)
matriz de contacto (352, 352)
matriz de correlacion (352, 352)
matriz de contacto (352, 352)
matriz de correlacion (352, 352)
matriz de contacto (352, 352)
matriz de correlacion (352, 352)
matriz de contacto (352, 352)


# correccion de numero residuo

In [18]:
import sys
sys.path.append("../math_tricks/")
sys.path.append("../../Serch/")
import read_pdb_tools as rpt

In [19]:
file1 = 'dimer_clean.pdb'
pdb1 = rpt.PdbStruct(file1)
pdb1 = rpt.PdbStruct(file1)
pdb1.AddPdbData("%s" % file1)
pdb1.PrintPdbInfo()
parejas_resi_resx = [[i.resx, i.resi] for i in pdb1.GetResChain()]
dimer_parejas_resi_resx = dict(parejas_resi_resx)

Number of residues and frame: 352    None
Number of chains:             1  dict_keys(['A'])


In [20]:
file1 = 'tetra_clean.pdb'
pdb1 = rpt.PdbStruct(file1)
pdb1 = rpt.PdbStruct(file1)
pdb1.AddPdbData("%s" % file1)
pdb1.PrintPdbInfo()
parejas_resi_resx = [[i.resx, i.resi] for i in pdb1.GetResChain()]
tetra_parejas_resi_resx = dict(parejas_resi_resx)

Number of residues and frame: 352    None
Number of chains:             1  dict_keys(['A'])


In [23]:
def get_table_df_res_contact(df, dic):
    df = df[['Degree', 'Betweenness']].reset_index()
    df.columns = ['Num_Residuo', 'Grado', 'Intermediacion']
    num_res = [dic.get(i) for i in df.Num_Residuo]
    df['Num_Residuo'] = num_res
    df.Num_Residuo = df.Num_Residuo.astype(int)
    df = df.sort_values('Num_Residuo').reset_index(drop=True)
    
    return df

In [22]:
df_rdc = measuring(nx.from_numpy_matrix(matrix_corr_dimer_con))
df_rds = measuring(nx.from_numpy_matrix(matrix_corr_dimer_sin))
df_rtc = measuring(nx.from_numpy_matrix(matrix_corr_tetra_con))
df_rts = measuring(nx.from_numpy_matrix(matrix_corr_tetra_sin))


In [25]:
table_rdc = get_table_df_res_contact(df_rdc,dimer_parejas_resi_resx)
table_rds = get_table_df_res_contact(df_rds,dimer_parejas_resi_resx)
table_rtc = get_table_df_res_contact(df_rtc,tetra_parejas_resi_resx)
table_rts = get_table_df_res_contact(df_rts,tetra_parejas_resi_resx)

In [26]:
table_rdc.to_csv('tabla_red_dimer_con_correlation.csv', index=False)
table_rds.to_csv('tabla_red_dimer_sin_correlation.csv', index=False)
table_rtc.to_csv('tabla_red_tetra_con_correlation.csv', index=False)
table_rts.to_csv('tabla_red_tetra_sin_correlation.csv', index=False)