In [1]:
import pandas as pd
import re

# Crear un diccionario que asocie cada Organism ID con su nombre correspondiente
organism_names = {
    "9606": "Homo sapiens",
    "10116": "Rattus norvegicus",
    "10090": "Mus musculus",
    "9913": "Bos taurus",
    "11676": "Human Immunodeficiency Virus 1",
    "9986": "Oryctolagus cuniculus",
    "37296": "Human Herpesvirus 8",
    "559292": "Saccharomyces cerevisiae (S288c)",
    "333760": "Human papillomavirus (16)",
    "2697049": "Severe acute respiratory syndrome coronavirus 2",
    "1335626": "Middle-East Respiratory Syndrome-related Coronavirus",
    "6239": "Caenorhabditis elegans",
    "10600": "Human papillomavirus (6b)",
    "7227": "Drosophila melanogaster",
    "9823": "Sus scrofa",
    "9031": "Gallus gallus"
}

# Cargar el archivo TSV inicial
file_path_initial = 'string_interactions_Fusobacterium Nucleatum.tsv'
data_initial = pd.read_csv(file_path_initial, delimiter='\t')

# Procesar las columnas node1 y node2 para extraer los IDs de los taxones
data_initial['Taxid interactor A'] = data_initial['node1_string_id'].apply(lambda x: x.split('.')[0])
data_initial['Taxid interactor B'] = data_initial['node2_string_id'].apply(lambda x: x.split('.')[0])

# Mapear los IDs de los taxones a sus nombres correspondientes
data_initial['Taxid interactor A'] = data_initial['Taxid interactor A'].map(organism_names)
data_initial['Taxid interactor B'] = data_initial['Taxid interactor B'].map(organism_names)

# Renombrar las columnas
data_initial = data_initial.rename(columns={
    '#node1': 'Interactor A Genes',
    'node2': 'Interactor B Genes',
    'combined_score': 'Combined Score'
})

# Seleccionar solo las columnas de interés
columnas_interes = ['Interactor A Genes', 'Interactor B Genes', 'Combined Score', 'Taxid interactor A', 'Taxid interactor B']
data_initial = data_initial[columnas_interes]

# Filtrar los valores de Combined Score menores a 0.9
data_initial = data_initial[data_initial['Combined Score'].astype(float) >= 0.9]

# Cargar el archivo CSV con los datos filtrados y con Fold Change
file_path_fc = 'deSeqRes_filtrado_Fusobacterium_Nucleatum_with_FC.csv'
data_with_fc = pd.read_csv(file_path_fc)

# Renombrar las columnas
data_with_fc = data_with_fc.rename(columns={
    'GeneSymbol': 'Genes',
    'DirectionalFoldChange': 'FoldChange'
})

# Crear una nueva columna 'Regulation' basada en el signo de 'FoldChange'
data_with_fc['Regulation'] = data_with_fc['FoldChange'].apply(lambda x: 'UpRegulated' if x > 0 else 'DownRegulated')

# Tomar el valor absoluto de 'FoldChange'
data_with_fc['FoldChange'] = data_with_fc['FoldChange'].abs()

# Realizar el join en ambas columnas 'Interactor A Genes' y 'Interactor B Genes'
merged_data_A = data_initial.merge(data_with_fc, left_on='Interactor A Genes', right_on='Genes', how='left')
merged_data_B = data_initial.merge(data_with_fc, left_on='Interactor B Genes', right_on='Genes', how='left')

# Concatenar los resultados y eliminar duplicados
merged_data = pd.concat([merged_data_A, merged_data_B]).drop_duplicates()

# Eliminar los registros donde 'Genes' sea NaN
merged_data = merged_data.dropna(subset=['Genes'])


# Ordenar el DataFrame por 'FoldChange' de mayor a menor
merged_data = merged_data.sort_values(by='FoldChange', ascending=False)

merged_data = merged_data.drop_duplicates()

# Guardar el resultado en un nuevo archivo CSV
merged_data.to_csv('String_data_deSeqRes_filtrado_Fusobacterium_nucleatum.csv', index=False)

# Guardar el resultado en un nuevo archivo Excel
merged_data.to_excel('String_data_deSeqRes_filtrado_Fusobacterium_nucleatum.xlsx', index=False)


In [2]:
merged_data

Unnamed: 0,Interactor A Genes,Interactor B Genes,Combined Score,Taxid interactor A,Taxid interactor B,Genes,Log2FoldChange,FoldChange,Regulation
7,SMAD6,SMAD7,0.984,Homo sapiens,Homo sapiens,SMAD7,-2.336285,5.050006,DownRegulated
8,SMAD7,SMAD6,0.984,Homo sapiens,Homo sapiens,SMAD7,-2.336285,5.050006,DownRegulated
11,WNT16,FZD1,0.913,Homo sapiens,Homo sapiens,WNT16,1.58495,2.999974,UpRegulated
2,FZD1,WNT16,0.913,Homo sapiens,Homo sapiens,WNT16,1.58495,2.999974,UpRegulated
9,SORT1,NGFR,0.999,Homo sapiens,Homo sapiens,NGFR,-1.578553,2.9867,DownRegulated
6,NGFR,SORT1,0.999,Homo sapiens,Homo sapiens,NGFR,-1.578553,2.9867,DownRegulated
0,CCND1,KAT2B,0.959,Homo sapiens,Homo sapiens,KAT2B,-1.321914,2.499975,DownRegulated
4,KAT2B,CCND1,0.959,Homo sapiens,Homo sapiens,KAT2B,-1.321914,2.499975,DownRegulated
4,KAT2B,CCND1,0.959,Homo sapiens,Homo sapiens,CCND1,-1.312102,2.483031,DownRegulated
0,CCND1,KAT2B,0.959,Homo sapiens,Homo sapiens,CCND1,-1.312102,2.483031,DownRegulated
