In [6]:
import pandas as pd
import re

# Crear un diccionario que asocie cada Organism ID con su nombre correspondiente
organism_names = {
    "9606": "Homo sapiens",
    "10116": "Rattus norvegicus",
    "10090": "Mus musculus",
    "9913": "Bos taurus",
    "11676": "Human Immunodeficiency Virus 1",
    "9986": "Oryctolagus cuniculus",
    "37296": "Human Herpesvirus 8",
    "559292": "Saccharomyces cerevisiae (S288c)",
    "333760": "Human papillomavirus (16)",
    "2697049": "Severe acute respiratory syndrome coronavirus 2",
    "1335626": "Middle-East Respiratory Syndrome-related Coronavirus",
    "6239": "Caenorhabditis elegans",
    "10600": "Human papillomavirus (6b)",
    "7227": "Drosophila melanogaster",
    "9823": "Sus scrofa",
    "9031": "Gallus gallus"
}

# Cargar el archivo TSV inicial
file_path_initial = 'string_interactions_Propionibacterium freudenreichii.tsv'
data_initial = pd.read_csv(file_path_initial, delimiter='\t')

# Procesar las columnas node1 y node2 para extraer los IDs de los taxones
data_initial['Taxid interactor A'] = data_initial['node1_string_id'].apply(lambda x: x.split('.')[0])
data_initial['Taxid interactor B'] = data_initial['node2_string_id'].apply(lambda x: x.split('.')[0])

# Mapear los IDs de los taxones a sus nombres correspondientes
data_initial['Taxid interactor A'] = data_initial['Taxid interactor A'].map(organism_names)
data_initial['Taxid interactor B'] = data_initial['Taxid interactor B'].map(organism_names)

# Renombrar las columnas
data_initial = data_initial.rename(columns={
    '#node1': 'Interactor A Genes',
    'node2': 'Interactor B Genes',
    'combined_score': 'Combined Score'
})

# Seleccionar solo las columnas de interés
columnas_interes = ['Interactor A Genes', 'Interactor B Genes', 'Combined Score', 'Taxid interactor A', 'Taxid interactor B']
data_initial = data_initial[columnas_interes]

# Filtrar los valores de Combined Score menores a 0.9
data_initial = data_initial[data_initial['Combined Score'].astype(float) < 0.9]

# Mostrar las primeras filas del nuevo DataFrame para verificar
print(data_initial.head())

  Interactor A Genes Interactor B Genes  Combined Score Taxid interactor A  \
0               AAMP              PA2G4           0.441       Homo sapiens   
1               AAMP              PTPRU           0.428       Homo sapiens   
2               AAMP             TBXA2R           0.890       Homo sapiens   
3               AAMP             INPP5K           0.523       Homo sapiens   
4              ABCA2              ABCB9           0.413       Homo sapiens   

  Taxid interactor B  
0       Homo sapiens  
1       Homo sapiens  
2       Homo sapiens  
3       Homo sapiens  
4       Homo sapiens  


In [2]:
data_initial

Unnamed: 0,#node1,node2,node1_string_id,node2_string_id,neighborhood_on_chromosome,gene_fusion,phylogenetic_cooccurrence,homology,coexpression,experimentally_determined_interaction,database_annotated,automated_textmining,combined_score
0,AAMP,PA2G4,9606.ENSP00000403343,9606.ENSP00000302886,0.0,0.0,0.000,0.00,0.213,0.288,0.0,0.082,0.441
1,AAMP,PTPRU,9606.ENSP00000403343,9606.ENSP00000334941,0.0,0.0,0.000,0.00,0.054,0.421,0.0,0.000,0.428
2,AAMP,TBXA2R,9606.ENSP00000403343,9606.ENSP00000393333,0.0,0.0,0.000,0.00,0.000,0.292,0.7,0.525,0.890
3,AAMP,INPP5K,9606.ENSP00000403343,9606.ENSP00000413937,0.0,0.0,0.000,0.00,0.060,0.514,0.0,0.000,0.523
4,ABCA2,ABCB9,9606.ENSP00000344155,9606.ENSP00000440288,0.0,0.0,0.000,0.55,0.074,0.186,0.0,0.282,0.413
...,...,...,...,...,...,...,...,...,...,...,...,...,...
21223,ZZZ3,ZNF697,9606.ENSP00000359837,9606.ENSP00000396857,0.0,0.0,0.000,0.00,0.056,0.000,0.0,0.410,0.419
21224,ZZZ3,MYC,9606.ENSP00000359837,9606.ENSP00000478887,0.0,0.0,0.000,0.00,0.061,0.072,0.0,0.425,0.456
21225,ZZZ3,CAND1,9606.ENSP00000359837,9606.ENSP00000442318,0.0,0.0,0.000,0.00,0.467,0.000,0.0,0.000,0.467
21226,ZZZ3,BMI1,9606.ENSP00000359837,9606.ENSP00000365851,0.0,0.0,0.000,0.00,0.257,0.000,0.0,0.380,0.520
