# Hit Predict Pipeline

This notebook creates a network using the latest Hit Predict data downloaded. 

## Imports Python Packages Needed in the Pipeline

In [1]:
import ndex2.client as nc
import json
import pandas as pd
import sys
import jsonschema
import ndexutil.tsv.tsv2nicecx as t2n

### Set variables
Note: The load_tutorial_config() function uses a file in the user's home directory (**~/ndex_tutorial_config.json**).  A sample json file is included in the root of this project (also named "ndex_tutorial_config.json").  Edit this file and copy it to your user home directory.  Alternatively you can just use the hardcoded variables below.

In [2]:
my_server = "public.ndexbio.org"
my_username = 'username'
my_password = 'password'

if 'dev.ndexbio.org' in my_server:
    cytoscape_visual_properties_template_id = 'c7075eb1-231e-11e8-894b-525400c25d22' # DEV
else:
    cytoscape_visual_properties_template_id = 'ece36fa0-1e5d-11e8-b939-0ac135e8bacf' # PUBLIC

my_ndex=nc.Ndex2(my_server, my_username, my_password)
print(my_server)
print(my_username)
print(my_password)

public.ndexbio.org
username
password


### Set load plan
Note: To see the field mapping (load plan) open **hitpredict_load_plan.json**. This json file resides in the same directory as this notebook

In [3]:
path_to_load_plan = 'hitpredict_load_plan.json'
load_plan = None
with open(path_to_load_plan, 'r') as lp:
    load_plan = json.load(lp)

### Process Hit Predict network

In [4]:
def process_hitpredict_network(load_plan):
    # TODO - add context (normalize?)
    # @CONTEXT is set from the load plan

    with open('HitPredit_in_KEGG.txt', 'r') as tsvfile:
        header = [h.strip() for h in tsvfile.readline().split('\t')]

        df = pd.read_csv(tsvfile, delimiter='\t', na_filter=False, engine='python', names=header)

    # upcase column names
    rename = {}
    for column_name in df.columns:
        rename[column_name] = column_name.upper()

    df = df.rename(columns=rename)

    # IF THE NAME FIELD IS EMPTY - SUBSTITUTE UNIPROT FIELD
    df['NAME1'].replace('', df['UNIPROT1'], inplace=True)
    df['NAME2'].replace('', df['UNIPROT2'], inplace=True)

    # ADD COLUMN TO DATAFRAME TO BE USED AS DEFAULT INTERACTION
    df.loc[:, 'DEFAULT INTERACTION'] = pd.Series('interacts with', index=df.index)

    network = t2n.convert_pandas_to_nice_cx_with_load_plan(df, load_plan)

    for node_id, node in network.nodes.items():
        # IF NODE NAME HAS DELIMITER (;) THEN SPLIT AND TAKE FIRST ELEMENT
        if ';' in node.get_name():
            node_name_temp = node.get_name().split(';')
            node.set_node_name(node_name_temp[0])

        # STRIP OFF EXTRA DATA IN THE ALIAS ATTRIBUTE
        values = network.get_node_attribute(node, 'alias2')
        if not isinstance(values, list):
            values = [values]

        replacement_values = []
        for val in values:
            if val is None:
                break
            sub_values = val.split('[')
            replacement_values.append(sub_values[0])

        if len(replacement_values) < 1:
            network.remove_node_attribute(node, 'alias2')
        else:
            network_att = network.get_node_attribute_objects(node_id, 'alias2')
            network_att.set_values(replacement_values)

    network.set_network_attribute("organism", "Human, 9606, Homo sapiens")
    network.union_node_attributes('alias', 'alias2', 'alias')
    network.set_name('HitPredict - Human')
        
    description = '<a href="http://hintdb.hgc.jp/htp/" target="_blank">HitPredict</a> is a resource of experimentally determined protein-protein interactions with reliability scores. Protein-protein interactions from IntAct, BioGRID, HPRD, MINT and DIP are combined, annotated and scored. The reliability score is calculated based on the experimental details of each interaction and the sequence, structure and functional annotations of the interacting proteins. This network contains all human interactions that map to known Kegg pathways; edge colors from light blue to dark blue are mapped to the "Total score" value.'
    network.set_network_attribute('description', description)

    network.set_network_attribute('version', 'v.4 (JUL-2017)')

    references = 'Yosvany Lopez, Kenta Nakai and Ashwini Patil. <b>HitPredict version 4 - comprehensive reliability scoring of ' \
                 'physical protein-protein interactions from more than 100 species.</b><br />' \
                 'Database (Oxford) 2015; 2015: bav117.<br />' \
                 '<a href="https://dx.doi.org/10.1093%2Fdatabase%2Fbav117" target="_blank">doi:10.1093/database/bav117</a>'


    network.apply_template(username=my_username, password=my_password, server=my_server,
                           uuid=cytoscape_visual_properties_template_id)
    message = network.upload_to(my_server, my_username, my_password)

print('starting...')
process_hitpredict_network(load_plan)
print('finished...')



starting...


KeyboardInterrupt: 