# Exploratory Data Analysis: Networks Dataframe (from Omnipath database)

[//]: # (------------------------------------------    DO NOT MODIFY THIS    ------------------------------------------)
<style type="text/css">
.tg  {border-collapse:collapse;
      border-spacing:0;
     }
.tg td{border-color:black;
       border-style:solid;
       border-width:1px;
       font-family:Arial, sans-serif;
       font-size:14px;
       overflow:hidden;
       padding:10px 5px;
       word-break:normal;
      }
.tg th{border-color:black;
       border-style:solid;
       border-width:1px;
       font-family:Arial, sans-serif;
       font-size:14px;
       font-weight:normal;
       overflow:hidden;
       padding:10px 5px;
       word-break:normal;
      }
.tg .tg-fymr{border-color:inherit;
             font-weight:bold;
             text-align:left;
             vertical-align:top
            }
.tg .tg-0pky{border-color:inherit;
             text-align:left;
             vertical-align:top
            }
[//]: # (--------------------------------------------------------------------------------------------------------------)

[//]: # (-------------------------------------    FILL THIS OUT WITH YOUR DATA    -------------------------------------)
</style>
<table class="tg">
    <tbody>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Title:</td>
        <td class="tg-0pky">Exploratory Data Analysis: Networks Dataframe (from Omnipath database)</td>
      </tr>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Authors:</td>
        <td class="tg-0pky">
            <a href="https://github.com/ecarrenolozano" target="_blank" rel="noopener noreferrer">Edwin Carreño</a>
        </td>
      </tr>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Affiliations:</td>
        <td class="tg-0pky">
            <a href="https://www.ssc.uni-heidelberg.de/en" target="_blank" rel="noopener noreferrer">Scientific Software Center</a>,
            <a href="https://saezlab.org/" target="_blank" rel="noopener noreferrer">Saez-Rodriguez Group</a>
        </td>
      </tr>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Date Created:</td>
        <td class="tg-0pky">19.03.2025</td>
      </tr>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Description:</td>
        <td class="tg-0pky">Extraction of metadata for building database tables </td>
      </tr>
    </tbody>
</table>

[//]: # (--------------------------------------------------------------------------------------------------------------)

## Overview


In this section you should introduce the purpose of this Notebook and a list of expected outcomes achieved by the user (especially if this a tutorial) at the of running all the cells.

## Setup (if required)

If your code require to install dependencies before your main code, please add the commands to install the dependencies.

### Pandas installation

In [1]:
!pip install pandas -q

## Importing Libraries

In [2]:
"""
Recommendations:
    - Respect the order of the imports, they are indicated by the numbers 1, 2, 3.
    - One import per line is recommended, with this we can track easily any modified line when we use git.
    - Absolute imports are recommended (see 3. Local application/library specific imports below), they improve readability and give better error messages.
    - You should put a blank line between each group of imports.
"""

# future-imports (for instance: from __future__ import barry_as_FLUFL)
# from __future__ import barry_as_FLUFL  

# 1. Standard library imports
import os

# 2. Related third party imports
import numpy as np
import pandas as pd
from pydantic import BaseModel
from pydantic import Field

# 3. Local application/library specific imports
# import <mypackage>.<MyClass>         # this is an example
# from <mypackage> import <MyClass>    # this is another example 

## Introduction

## Section 1: Point to "Interactions Dataframe"

This sections serves to explain a topic or give a background. Do not hesitate and include images and latex equations if you need them.

### Subsection 1.1: Setting dataset path

In [3]:
interactions_path = os.path.join("../data/omnipath_archive/omnipath_webservice_interactions__latest.tsv.gz")

In [4]:
print("This file exist? {}".format(os.path.exists(interactions_path)))

This file exist? True


### Subsection 1.2: Exploratory Data Analysis

### Configuring Pandas view

In [5]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)  

### Load data into Pandas Dataframe (without predefined data types)

In [6]:
interactions_df = pd.read_table(interactions_path)

  interactions_df = pd.read_table(interactions_path)


In [7]:
interactions_df.head()

Unnamed: 0,source,target,source_genesymbol,target_genesymbol,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,sources,references,omnipath,kinaseextra,ligrecextra,pathwayextra,mirnatarget,dorothea,collectri,tf_target,lncrna_mrna,tf_mirna,small_molecule,dorothea_curated,dorothea_chipseq,dorothea_tfbs,dorothea_coexp,dorothea_level,type,curation_effort,extra_attrs,evidences,ncbi_tax_id_source,entity_type_source,ncbi_tax_id_target,entity_type_target
0,P0DP23,P48995,CALM1,TRPC1,1,0,1,1,0,1,TRIP,TRIP:11290752;TRIP:11983166;TRIP:12601176,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,3,"{""TRIP_method"":[""Calcium measurement"",""Fluorescence probe labeling"",""Fusion protein-pull down assay"",""Patch clamp""]}","{""id_a"":""P0DP23"",""id_b"":""P48995"",""positive"":[],""negative"":[{""resource"":""TRIP"",""references"":[""11983166"",""12601176"",""11290752""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""Calcium measurement"",""Fluorescence probe labeling"",""Fusion protein-pull down assay"",""Patch clamp""]}}],""directed"":[],""undirected"":[]}",9606,protein,9606,protein
1,P0DP25,P48995,CALM3,TRPC1,1,0,1,1,0,1,TRIP,TRIP:11290752;TRIP:11983166;TRIP:12601176,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,3,"{""TRIP_method"":[""Calcium measurement"",""Fluorescence probe labeling"",""Fusion protein-pull down assay"",""Patch clamp""]}","{""id_a"":""P0DP25"",""id_b"":""P48995"",""positive"":[],""negative"":[{""resource"":""TRIP"",""references"":[""11983166"",""12601176"",""11290752""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""Calcium measurement"",""Fluorescence probe labeling"",""Fusion protein-pull down assay"",""Patch clamp""]}}],""directed"":[],""undirected"":[]}",9606,protein,9606,protein
2,P0DP24,P48995,CALM2,TRPC1,1,0,1,1,0,1,TRIP,TRIP:11290752;TRIP:11983166;TRIP:12601176,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,3,"{""TRIP_method"":[""Calcium measurement"",""Fluorescence probe labeling"",""Fusion protein-pull down assay"",""Patch clamp""]}","{""id_a"":""P0DP24"",""id_b"":""P48995"",""positive"":[],""negative"":[{""resource"":""TRIP"",""references"":[""11983166"",""12601176"",""11290752""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""Calcium measurement"",""Fluorescence probe labeling"",""Fusion protein-pull down assay"",""Patch clamp""]}}],""directed"":[],""undirected"":[]}",9606,protein,9606,protein
3,Q03135,P48995,CAV1,TRPC1,1,1,0,1,1,0,DIP;HPRD;IntAct;Lit-BM-17;TRIP,DIP:19897728;HPRD:12732636;IntAct:19897728;Lit-BM-17:10980191;Lit-BM-17:19052258;Lit-BM-17:19897728;TRIP:12732636;TRIP:14551243;TRIP:16822931;TRIP:18430726;TRIP:19052258;TRIP:19351713;TRIP:19897728,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,13,"{""TRIP_method"":[""Cell surface biotinylation"",""Co-immunoprecipitation"",""Co-immunofluorescence staining"",""Fluorescence resonance energy transfer"",""Yeast two-hybrid"",""Fusion protein-pull down assay""]}","{""id_a"":""Q03135"",""id_b"":""P48995"",""positive"":[{""resource"":""TRIP"",""references"":[""19351713"",""19052258"",""12732636"",""14551243"",""19897728"",""18430726"",""16822931""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""Cell surface biotinylation"",""Co-immunoprecipitation"",""Co-immunofluorescence staining"",""Fluorescence resonance energy transfer"",""Yeast two-hybrid"",""Fusion protein-pull down assay""]}}],""negative"":[],""directed"":[],""undirected"":[{""resource"":""DIP"",""references"":[""19897728""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""fluorescent resonance energy transfer"",""anti bait coimmunoprecipitation"",""two hybrid""],""type"":[""direct interaction"",""physical association""],""id"":""DIP-104198E""}},{""resource"":""IntAct"",""references"":[""19897728""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""fluorescent resonance energy transfer"",""two hybrid"",""anti bait coimmunoprecipitation""]}},{""resource"":""Lit-BM-17"",""references"":[""10980191"",""19897728"",""19052258""],""dataset"":""omnipath"",""via"":null,""attrs"":{""mentha_score"":0.765}},{""resource"":""HPRD"",""references"":[""12732636""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""in vitro"",""in vivo"",""yeast 2-hybrid""]}}]}",9606,protein,9606,protein
4,P14416,P48995,DRD2,TRPC1,1,1,0,1,1,0,TRIP,TRIP:18261457,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,1,"{""TRIP_method"":[""Cell surface biotinylation"",""Co-immunoprecipitation"",""Co-immunofluorescence staining"",""Yeast two-hybrid"",""Fusion protein-pull down assay""]}","{""id_a"":""P14416"",""id_b"":""P48995"",""positive"":[{""resource"":""TRIP"",""references"":[""18261457""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""Cell surface biotinylation"",""Co-immunoprecipitation"",""Co-immunofluorescence staining"",""Yeast two-hybrid"",""Fusion protein-pull down assay""]}}],""negative"":[],""directed"":[],""undirected"":[]}",9606,protein,9606,protein


In [8]:
interactions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1217900 entries, 0 to 1217899
Data columns (total 36 columns):
 #   Column                 Non-Null Count    Dtype 
---  ------                 --------------    ----- 
 0   source                 1217900 non-null  object
 1   target                 1217900 non-null  object
 2   source_genesymbol      1217900 non-null  object
 3   target_genesymbol      1217900 non-null  object
 4   is_directed            1217900 non-null  int64 
 5   is_stimulation         1217900 non-null  int64 
 6   is_inhibition          1217900 non-null  int64 
 7   consensus_direction    1217900 non-null  int64 
 8   consensus_stimulation  1217900 non-null  int64 
 9   consensus_inhibition   1217900 non-null  int64 
 10  sources                1217900 non-null  object
 11  references             413020 non-null   object
 12  omnipath               1217900 non-null  bool  
 13  kinaseextra            1217900 non-null  bool  
 14  ligrecextra            1217900 non

### Extract Metadata

In [9]:
metadata = pd.DataFrame({
    'Column Name': interactions_df.columns,
    'Data Type': interactions_df.dtypes.values,
    'Nullable': interactions_df.isnull().any().values,
    'Unique Values': [interactions_df[col].nunique() for col in interactions_df.columns]
})

In [10]:
metadata

Unnamed: 0,Column Name,Data Type,Nullable,Unique Values
0,source,object,False,27944
1,target,object,False,47976
2,source_genesymbol,object,False,22117
3,target_genesymbol,object,False,36930
4,is_directed,int64,False,2
5,is_stimulation,int64,False,2
6,is_inhibition,int64,False,2
7,consensus_direction,int64,False,2
8,consensus_stimulation,int64,False,2
9,consensus_inhibition,int64,False,2


In [11]:
field = "entity_type_source"

print("List of unique values in field: {}\n\t{}".format(field, str(interactions_df[field].unique()).replace(" ", ",")))

List of unique values in field: entity_type_source
	['protein','complex','mirna','lncrna','small_molecule']


In [39]:
filtered = interactions_df[(interactions_df["source"]=="Q16254")
                         & (interactions_df["target"]=="O43683")]
#filtered[["source", "target", "is_stimulation", "omnipath"]]

filtered

Unnamed: 0,source,target,source_genesymbol,target_genesymbol,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,sources,references,omnipath,kinaseextra,ligrecextra,pathwayextra,mirnatarget,dorothea,collectri,tf_target,lncrna_mrna,tf_mirna,small_molecule,dorothea_curated,dorothea_chipseq,dorothea_tfbs,dorothea_coexp,dorothea_level,type,curation_effort,extra_attrs,evidences,ncbi_tax_id_source,entity_type_source,ncbi_tax_id_target,entity_type_target
155665,Q16254,O43683,E2F4,BUB1,1,1,0,1,1,0,ACSN;SPIKE_LC,ACSN:15574595;SPIKE_LC:18319725,False,False,False,True,False,False,False,False,False,False,False,,,,,,post_translational,2,"{""SPIKE_LC_effect"":""1"",""SPIKE_LC_mechanism"":""Transcription Regulation"",""ACSN_effect"":[""UNKNOWN_TRANSITION"",""1""]}","{""id_a"":""Q16254"",""id_b"":""O43683"",""positive"":[{""resource"":""SPIKE_LC"",""references"":[""18319725""],""dataset"":""pathwayextra"",""via"":null,""attrs"":{""effect"":""1"",""mechanism"":""Transcription Regulation""}}],""negative"":[],""directed"":[{""resource"":""ACSN"",""references"":[""15574595""],""dataset"":""directionextra"",""via"":null,""attrs"":{""effect"":[""UNKNOWN_TRANSITION"",""1""]}}],""undirected"":[]}",9606,protein,9606,protein
443993,Q16254,O43683,E2F4,BUB1,1,0,0,0,0,0,DoRothEA;ENCODE-proximal;HTRIdb;PAZAR_DoRothEA,HTRIdb:17531812,False,False,False,False,False,True,False,True,False,False,False,True,False,False,False,D,transcriptional,1,"{""DoRothEA_curated"":true,""DoRothEA_chipseq"":false,""DoRothEA_tfbs"":false,""DoRothEA_coexp"":false,""DoRothEA_level"":""D""}","{""id_a"":""Q16254"",""id_b"":""O43683"",""positive"":[],""negative"":[],""directed"":[{""resource"":""PAZAR"",""references"":[],""dataset"":""dorothea"",""via"":""DoRothEA"",""attrs"":{""curated"":true,""chipseq"":false,""tfbs"":false,""coexp"":false,""level"":""D""}},{""resource"":""DoRothEA"",""references"":[],""dataset"":""dorothea"",""via"":null,""attrs"":{""curated"":true,""chipseq"":false,""tfbs"":false,""coexp"":false,""level"":""D""}},{""resource"":""ENCODE-proximal"",""references"":[],""dataset"":""tf_target"",""via"":null,""attrs"":{}},{""resource"":""HTRIdb"",""references"":[""17531812""],""dataset"":""tf_target"",""via"":null,""attrs"":{}}],""undirected"":[]}",9606,protein,9606,protein


In [44]:
omnipath_df = interactions_df[(interactions_df["omnipath"])==True]
omnipath_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 191740 entries, 0 to 304714
Data columns (total 36 columns):
 #   Column                 Non-Null Count   Dtype 
---  ------                 --------------   ----- 
 0   source                 191740 non-null  object
 1   target                 191740 non-null  object
 2   source_genesymbol      191740 non-null  object
 3   target_genesymbol      191740 non-null  object
 4   is_directed            191740 non-null  int64 
 5   is_stimulation         191740 non-null  int64 
 6   is_inhibition          191740 non-null  int64 
 7   consensus_direction    191740 non-null  int64 
 8   consensus_stimulation  191740 non-null  int64 
 9   consensus_inhibition   191740 non-null  int64 
 10  sources                191740 non-null  object
 11  references             191721 non-null  object
 12  omnipath               191740 non-null  bool  
 13  kinaseextra            191740 non-null  bool  
 14  ligrecextra            191740 non-null  bool  
 15  pathw

In [45]:
omnipath_df[(omnipath_df["source"])==(omnipath_df["target"])]

Unnamed: 0,source,target,source_genesymbol,target_genesymbol,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,sources,references,omnipath,kinaseextra,ligrecextra,pathwayextra,mirnatarget,dorothea,collectri,tf_target,lncrna_mrna,tf_mirna,small_molecule,dorothea_curated,dorothea_chipseq,dorothea_tfbs,dorothea_coexp,dorothea_level,type,curation_effort,extra_attrs,evidences,ncbi_tax_id_source,entity_type_source,ncbi_tax_id_target,entity_type_target
410,Q14457,Q14457,BECN1,BECN1,1,1,0,1,1,0,InnateDB;Lit-BM-17;SPIKE,InnateDB:18641390;InnateDB:19050071;InnateDB:19270696;Lit-BM-17:18641390;Lit-BM-17:19270696;Lit-BM-17:20501938;Lit-BM-17:20562859;Lit-BM-17:24034250;SPIKE:18641390,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,9,"{""SPIKE_effect"":""1"",""SPIKE_mechanism"":""Physical Interaction""}","{""id_a"":""Q14457"",""id_b"":""Q14457"",""positive"":[{""resource"":""SPIKE"",""references"":[""18641390""],""dataset"":""omnipath"",""via"":null,""attrs"":{""effect"":""1"",""mechanism"":""Physical Interaction""}}],""negative"":[],""directed"":[],""undirected"":[{""resource"":""InnateDB"",""references"":[""19270696"",""19050071"",""18641390""],""dataset"":""omnipath"",""via"":null,""attrs"":{}},{""resource"":""Lit-BM-17"",""references"":[""19270696"",""18641390"",""20562859"",""20501938"",""24034250""],""dataset"":""omnipath"",""via"":null,""attrs"":{""mentha_score"":0.917}}]}",9606,protein,9606,protein
522,P46531,P46531,NOTCH1,NOTCH1,1,1,0,1,1,0,BioGRID;HPRD;NetPath;SPIKE,BioGRID:10669757;BioGRID:15561108;HPRD:10669757;HPRD:11425854;HPRD:11585921;NetPath:10082551;NetPath:10713164;NetPath:10747963;NetPath:11006133;NetPath:11418662;NetPath:11425854;NetPath:11564735;NetPath:11604490;NetPath:11604511;NetPath:11827460;NetPath:12370315;NetPath:12644465;NetPath:12913000;NetPath:14500836;NetPath:14583609;NetPath:14638857;NetPath:8749394;NetPath:8755477;NetPath:9169836;SPIKE:18495817,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,25,"{""SPIKE_effect"":""1"",""SPIKE_mechanism"":""Other""}","{""id_a"":""P46531"",""id_b"":""P46531"",""positive"":[{""resource"":""SPIKE"",""references"":[""18495817""],""dataset"":""omnipath"",""via"":null,""attrs"":{""effect"":""1"",""mechanism"":""Other""}}],""negative"":[],""directed"":[],""undirected"":[{""resource"":""BioGRID"",""references"":[""10669757"",""15561108""],""dataset"":""omnipath"",""via"":null,""attrs"":{}},{""resource"":""NetPath"",""references"":[""8755477"",""11827460"",""10082551"",""9169836"",""14500836"",""12370315"",""11425854"",""11418662"",""11564735"",""12913000"",""14583609"",""10713164"",""12644465"",""11006133"",""14638857"",""11604490"",""11604511"",""8749394"",""10747963""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""two hybrid"",""in vivo"",""in vitro""],""type"":[""physical interaction""],""pathway"":[""Notch""]}},{""resource"":""HPRD"",""references"":[""10669757"",""11585921"",""11425854""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""in vitro"",""in vivo""]}}]}",9606,protein,9606,protein
565,P26367,P26367,PAX6,PAX6,1,1,0,1,1,0,SPIKE,SPIKE:11069887;SPIKE:12710953,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,2,"{""SPIKE_effect"":""1"",""SPIKE_mechanism"":""Transcription Regulation""}","{""id_a"":""P26367"",""id_b"":""P26367"",""positive"":[{""resource"":""SPIKE"",""references"":[""12710953"",""11069887""],""dataset"":""omnipath"",""via"":null,""attrs"":{""effect"":""1"",""mechanism"":""Transcription Regulation""}}],""negative"":[],""directed"":[],""undirected"":[]}",9606,protein,9606,protein
574,Q07812,Q07812,BAX,BAX,1,1,0,1,1,0,BioGRID;DIP;IntAct;Lit-BM-17;SIGNOR;SPIKE,BioGRID:19767770;DIP:18547146;IntAct:10620504;IntAct:14963330;IntAct:1558161;IntAct:17668322;IntAct:18547146;IntAct:18835031;IntAct:19062087;IntAct:19074440;IntAct:19767770;IntAct:20360684;IntAct:20850011;IntAct:22198199;IntAct:23374347;Lit-BM-17:10620504;Lit-BM-17:11161816;Lit-BM-17:14963330;Lit-BM-17:1558161;Lit-BM-17:17289999;Lit-BM-17:17668322;Lit-BM-17:18547146;Lit-BM-17:18835031;Lit-BM-17:19062087;Lit-BM-17:19074440;Lit-BM-17:19767770;Lit-BM-17:20360684;Lit-BM-17:20850011;Lit-BM-17:21712378;Lit-BM-17:22198199;Lit-BM-17:23374347;SIGNOR:10629050;SPIKE:9670005,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,33,"{""SPIKE_effect"":""1"",""SPIKE_mechanism"":""Physical Interaction"",""SIGNOR_mechanism"":[""Physical Interaction"",""binding""]}","{""id_a"":""Q07812"",""id_b"":""Q07812"",""positive"":[{""resource"":""SPIKE"",""references"":[""9670005""],""dataset"":""omnipath"",""via"":null,""attrs"":{""effect"":""1"",""mechanism"":""Physical Interaction""}},{""resource"":""SIGNOR"",""references"":[""10629050""],""dataset"":""omnipath"",""via"":null,""attrs"":{""mechanism"":[""Physical Interaction"",""binding""]}}],""negative"":[],""directed"":[],""undirected"":[{""resource"":""BioGRID"",""references"":[""19767770""],""dataset"":""omnipath"",""via"":null,""attrs"":{}},{""resource"":""DIP"",""references"":[""18547146""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""cross-linking study""],""type"":[""direct interaction""],""id"":""DIP-62302E""}},{""resource"":""IntAct"",""references"":[""22198199"",""19062087"",""1558161"",""18835031"",""17668322"",""18547146"",""10620504"",""19767770"",""20850011"",""20360684"",""19074440"",""14963330"",""23374347""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""cross-linking study"",""x-ray crystallography"",""blue native page"",""molecular sieving"",""polymerization"",""experimental interaction detection"",""comigration in sds page"",""fluorescent resonance energy transfer"",""anti bait coimmunoprecipitation""]}},{""resource"":""Lit-BM-17"",""references"":[""22198199"",""19062087"",""1558161"",""18835031"",""17668322"",""18547146"",""17289999"",""10620504"",""19767770"",""11161816"",""21712378"",""20850011"",""20360684"",""19074440"",""14963330"",""23374347""],""dataset"":""omnipath"",""via"":null,""attrs"":{""mentha_score"":1.0}}]}",9606,protein,9606,protein
589,Q16611,Q16611,BAK1,BAK1,1,1,0,1,1,0,HPRD;IntAct;Lit-BM-17;SIGNOR;SPIKE,HPRD:10579309;HPRD:15077116;IntAct:15901672;IntAct:16439990;IntAct:23782464;Lit-BM-17:15077116;Lit-BM-17:15901672;Lit-BM-17:16439990;Lit-BM-17:17446862;SIGNOR:11175253;SPIKE:17157251,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,11,"{""SPIKE_effect"":""1"",""SPIKE_mechanism"":""Physical Interaction"",""SIGNOR_mechanism"":[""Physical Interaction"",""binding""]}","{""id_a"":""Q16611"",""id_b"":""Q16611"",""positive"":[{""resource"":""SPIKE"",""references"":[""17157251""],""dataset"":""omnipath"",""via"":null,""attrs"":{""effect"":""1"",""mechanism"":""Physical Interaction""}},{""resource"":""SIGNOR"",""references"":[""11175253""],""dataset"":""omnipath"",""via"":null,""attrs"":{""mechanism"":[""Physical Interaction"",""binding""]}}],""negative"":[],""directed"":[],""undirected"":[{""resource"":""IntAct"",""references"":[""16439990"",""15901672"",""23782464""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""chromatography technology"",""cross-linking study"",""anti tag coimmunoprecipitation""]}},{""resource"":""Lit-BM-17"",""references"":[""16439990"",""15901672"",""17446862"",""15077116""],""dataset"":""omnipath"",""via"":null,""attrs"":{""mentha_score"":0.702}},{""resource"":""HPRD"",""references"":[""10579309"",""15077116""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""in vivo"",""yeast 2-hybrid""]}}]}",9606,protein,9606,protein
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301652,D4ACN6,D4ACN6,Cert1,Cert1,0,0,0,0,0,0,HPRD;HuRI,HPRD:16189514,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,1,"{""HPRD_method"":[""yeast 2-hybrid""],""HuRI_score"":0.89216319831}","{""id_a"":""D4ACN6"",""id_b"":""D4ACN6"",""positive"":[],""negative"":[],""directed"":[],""undirected"":[{""resource"":""HPRD"",""references"":[""16189514""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""yeast 2-hybrid""]}},{""resource"":""HuRI"",""references"":[],""dataset"":""omnipath"",""via"":null,""attrs"":{""score"":0.89216319831}}]}",10116,protein,10116,protein
301956,Q642A2,Q642A2,Agtrap,Agtrap,0,0,0,0,0,0,HPRD;HuRI,HPRD:16189514,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,1,"{""HPRD_method"":[""yeast 2-hybrid""],""HuRI_score"":0.945182175017}","{""id_a"":""Q642A2"",""id_b"":""Q642A2"",""positive"":[],""negative"":[],""directed"":[],""undirected"":[{""resource"":""HPRD"",""references"":[""16189514""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""yeast 2-hybrid""]}},{""resource"":""HuRI"",""references"":[],""dataset"":""omnipath"",""via"":null,""attrs"":{""score"":0.945182175017}}]}",10116,protein,10116,protein
302384,Q9EP80,Q9EP80,Pick1,Pick1,0,0,0,0,0,0,HPRD;HuRI,HPRD:11802773;HPRD:16055064;HPRD:9405395,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,3,"{""HPRD_method"":[""yeast 2-hybrid""],""HuRI_score"":0.932272775134}","{""id_a"":""Q9EP80"",""id_b"":""Q9EP80"",""positive"":[],""negative"":[],""directed"":[],""undirected"":[{""resource"":""HPRD"",""references"":[""9405395"",""11802773"",""16055064""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""yeast 2-hybrid""]}},{""resource"":""HuRI"",""references"":[],""dataset"":""omnipath"",""via"":null,""attrs"":{""score"":0.932272775134}}]}",10116,protein,10116,protein
302397,Q9R0Z3,Q9R0Z3,Cdkn2a,Cdkn2a,0,0,0,0,0,0,ACSN;HPRD,ACSN:12446760;ACSN:16135794;HPRD:8755727;HPRD:8910511,True,False,False,False,False,False,False,False,False,False,False,,,,,,post_translational,4,"{""HPRD_method"":[""in vitro"",""in vivo"",""yeast 2-hybrid""],""ACSN_effect"":[""HETERODIMER_ASSOCIATION""]}","{""id_a"":""Q9R0Z3"",""id_b"":""Q9R0Z3"",""positive"":[],""negative"":[],""directed"":[],""undirected"":[{""resource"":""HPRD"",""references"":[""8755727"",""8910511""],""dataset"":""omnipath"",""via"":null,""attrs"":{""method"":[""in vitro"",""in vivo"",""yeast 2-hybrid""]}},{""resource"":""ACSN"",""references"":[""12446760"",""16135794""],""dataset"":""directionextra"",""via"":null,""attrs"":{""effect"":[""HETERODIMER_ASSOCIATION""]}}]}",10116,protein,10116,protein


### Dataset with predefined data types

In [None]:
# Data types for interactions
dtype = {'source': 'string',
         'target': 'string',
         'source_genesymbol': 'string',
         'target_genesymbol': 'string',
         'is_directed': 'Int8',
         'is_stimulation': 'Int8',
         'is_inhibition': 'Int8',
         'consensus_direction': 'Int8',
         'consensus_stimulation': 'Int8',
         'consensus_inhibition': 'Int8',
         'sources': 'string',
         'references': 'string',
         'omnipath': 'bool',
         'kinaseextra': 'bool',
         'ligrecextra': 'bool',
         'pathwayextra': 'bool',
         'mirnatarget': 'bool',
         'dorothea': 'bool',
         'collectri': 'bool',
         'tf_target': 'bool',
         'lncrna_mrna': 'bool',
         'tf_mirna': 'bool',
         'small_molecule': 'bool',
         'dorothea_curated': 'string',
         'dorothea_chipseq': 'string',
         'dorothea_tfbs': 'string',
         'dorothea_coexp': 'string',
         'dorothea_level': 'string',
         'type': 'string',
         'curation_effort': 'Int8',
         'extra_attrs': 'string',
         'evidences': 'string',
         'ncbi_tax_id_source': 'Int64',
         'entity_type_source': 'string',
         'ncbi_tax_id_target': 'Int64',
         'entity_type_target': 'string'
}

In [None]:
interactions_df = pd.read_table(interactions_path, dtype=dtype)

In [None]:
interactions_df.info()

In [None]:
metadata

In [None]:
a = interactions_df["dorothea_curated"].replace({True: True, False: False, 'True': True, 'False': False, '1': True}).astype(bool)

In [None]:
a.unique()

In [None]:
print(np.sort(interactions_df["dorothea_curated"].unique()))

In [None]:
2**16

In [None]:
length_df = pd.DataFrame(columns=interactions_df.columns)

for column in interactions_df.columns:
    if not (isinstance(column, bool) and isinstance(column, int)):
        length_df[column] = interactions_df[column].apply(str).apply(len)
    else:
        print("Length cannot be calculated in column {}".format(column))

In [None]:
length_df.describe()

In [None]:
9.058000e+03

In [None]:
interactions_df.omnipath

In [None]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)  

In [None]:
interactions_df[interactions_df['source'].apply(str).apply(len) >= 400].source

## Section 2: Add Equations to the Notebook

## Section 3: Add plot from Matplotlib

## Section 4: Add interactive plots (plotly)

In [None]:
!pip install sqlalchemy