## List Extraction

#### Libraries

In [1]:
import pandas as pd
import os
import pickle

#### Variables

In [2]:
# Name of the files to load
Interactome_file_name_corrected_updated = "data/BIOGRID-ORGANISM-Homo_sapiens-4.4.240.tab3.txt"

# Name of file to save/load
PPI_list_variable_corrected_updated = "data/PPI_of_interest_corrected updated"

human_ID = 9606
interaction_of_interest = "physical"
First_gene_symbol_indicator = "Official Symbol Interactor A"
Second_gene_symbol_indicator = "Official Symbol Interactor B"

#### Preprocessing

In [3]:
if os.path.exists(PPI_list_variable_corrected_updated):
    print("The file already exists! Importing information from the file ... \n")
    with open(PPI_list_variable_corrected_updated, 'rb') as file:
        PPI = pickle.load(file)
    print("Done")

else:
    print("The file does not exist yet. Processing... \n")
    complete_interactome = pd.read_csv(Interactome_file_name_corrected_updated,
                                       delimiter='\t')

    human_interactome = complete_interactome[
        (complete_interactome["Organism ID Interactor A"] == human_ID)
        & (complete_interactome["Organism ID Interactor B"] == human_ID)]

    human_physical_interactome = human_interactome[
        (human_interactome["Experimental System Type"] == interaction_of_interest)]
    ## human_physical_interactome.to_csv("human_physical_interactome.csv", index = False)

    # Duplicate Removal
    PPI_NoDuplicates = human_physical_interactome.drop_duplicates(
        subset=[First_gene_symbol_indicator, Second_gene_symbol_indicator])

    PPI_NoDuplicates_NoSelfLoop = PPI_NoDuplicates[
        PPI_NoDuplicates[First_gene_symbol_indicator]
        !=
        PPI_NoDuplicates[Second_gene_symbol_indicator]]

    PPI = PPI_NoDuplicates_NoSelfLoop
    with open(PPI_list_variable_corrected_updated, 'wb') as file:
        pickle.dump(PPI, file)

    del complete_interactome, human_interactome, human_physical_interactome, \
        PPI_NoDuplicates, PPI_NoDuplicates_NoSelfLoop

The file already exists! Importing information from the file ... 

Done


In [4]:
print(PPI.head())

   #BioGRID Interaction ID Entrez Gene Interactor A Entrez Gene Interactor B  \
0                      103                     6416                     2318   
1                      117                    84665                       88   
2                      183                       90                     2339   
3                      278                     2624                     5371   
4                      418                     6118                     6774   

   BioGRID ID Interactor A  BioGRID ID Interactor B  \
0                   112315                   108607   
1                   124185                   106603   
2                   106605                   108625   
3                   108894                   111384   
4                   112038                   112651   

  Systematic Name Interactor A Systematic Name Interactor B  \
0                            -                            -   
1                            -                            -   