In [1]:
import pandas as pd
import pickle
from quickUmls.client import get_quickumls_client
import networkx as nx
from os.path import join

### Get Concepts from Query

In [2]:
def getConceptsFromQuery(query):
    matcher = get_quickumls_client()
    concepts = matcher.match(query, best_match=True, ignore_syntax=False)
    concept = concepts[0][0]['cui']
    return concept

### Read Query Data

In [3]:
query_df = pd.read_csv("../data/topics.csv")

In [4]:
query_df.head()

Unnamed: 0,disease,age,sex,gene
0,melanoma,64,male,BRAF (V600E)
1,melanoma,54,male,BRAF (V600K)
2,melanoma,80,male,BRAF (V600R)
3,melanoma,38,male,BRAF (K601E)
4,melanoma,57,male,"BRAF (V600E), PTEN loss of function"


## Read All Disease Csv File

In [5]:
all_trials_df = pd.read_csv("AllDataOfSnapshotInCsvFormat.csv")

In [6]:
all_trials_df.head()

Unnamed: 0,nct_id,brief_title,official_title,brief_summary,detailed_description,eligibility,overall_status,condition,gender,minimum_age,maximum_age,mesh_term_list,keyword_list
0,NCT01762241,Xbox Kinect Training in Men With Prostate Cancer,Xbox Kinect Training in Men With Metastatic Pr...,The purpose of this study is to investigate th...,Patients will be recruited from the outpatient...,Inclusion Criteria: - Prostate cancer requirin...,"Active, not recruiting",,Male,40.0,99.0,Prostatic Neoplasms,Prostate cancer;Androgen deprivation therapy;E...
1,NCT01762982,Patch Test of Benzalkonium Chloride Disinfecta...,"A Single Centre, Examiner-blind Human Patch Te...",The aim of the study is to evaluate the irrita...,Benzalkonium chloride is recognized as safe an...,Inclusion Criteria: 1. Consent: Demonstrates u...,Completed,,All,18.0,60.0,,
2,NCT01762280,A Phase I Study of Famitinib Malate in Patient...,A Phase I Study of Famitinib Malate in Patient...,Famitinib is a novel oral multitargeted tyrosi...,1. To evaluate the safety and tolerability of ...,Inclusion Criteria: - Histological confirmed a...,Unknown status,,All,18.0,65.0,,Famitinib;Advanced Solid tumor;Phase I
3,NCT01762072,Effect of Vitamin B12 and n-3 Polyunsaturated ...,,Vitamin B12and n-3 polyunsaturated fatty acids...,,"Inclusion Criteria: - reliability, regular eat...",Unknown status,,All,20.0,28.0,,homocysteine-vitamin B12-fish oil-ferritin
4,NCT01762696,"A Real-time, Contextual Intervention Using Mob...","A Real-time, Contextual Intervention Using PDA...",The purpose of this study is to further develo...,This study proposes a pilot randomized trial t...,Inclusion Criteria: - Male or female between t...,Completed,,All,15.0,24.0,Marijuana Abuse,"marijuana;intervention;adolescent, young adult..."


## Read Pickle File

In [7]:
concepts_dictionary_brief_title = pickle.load(open("../data/brief_title.p", "rb"))
concepts_dictionary_brief_summary = pickle.load(open("../data/brief_summary.p", "rb"))

## Get All Results for a given query

In [8]:
def getSexMinAgeMaxAgeFromTrial(rowTrial):
    try:
        trialSex = all_trials_df.iloc[rowTrial, 8].lower()
        # trialSex = dfTrials.values[rowTrial][8].lower()
    except:
        trialSex = "all"
    try:
        minAge = int(all_trials_df.iloc[rowTrial,9])
        # minAge = int(dfTrials.values[rowTrial][9])
    except:
        minAge = 0
    try:
        maxAge = int(all_trials_df.ilco[rowTrial, 10])
        # maxAge = int(dfTrials.values[rowTrial][10])
    except:
        maxAge = 100
    return trialSex, minAge, maxAge

## Apply Page Rank And Save Result to csv file

In [9]:
def applyPagerank(nct_ids, query):
    
    G1 = nx.Graph()
    G2 = nx.Graph()
    
    i = 0
    for nct1 in nct_ids:
        j = 0
        try:
            iConceptsBriefTitle = set(concepts_dictionary_brief_title[nct1])
#             print(iConceptsBriefTitle)
        except:
            iConceptsBriefTitle = set()
        try:
            iConceptsBriefSummary = set(concepts_dictionary_brief_summary[nct1])
        except:
            iConceptsBriefSummary = set()
        try:
            iUnionTitleSummary = iConceptsBriefTitle.union(iConceptsBriefSummary)
        except:
            iUnionTitleSummary = set()
        
        for nct2 in nct_ids:
            try:
                jConceptsBriefTitle = set(concepts_dictionary_brief_title[nct2])
            except:
                jConceptsBriefTitle = set()
            
            try:
                jConceptsBriefSummary = set(concepts_dictionary_brief_summary[nct2])
            except:
                jConceptsBriefSummary = set()
            
            try:
                jUnionTitleSummary = jConceptsBriefTitle.union(jConceptsBriefSummary)
            except:
                jUnionTitleSummary = ()
            
            score1 = len(iConceptsBriefTitle.intersection(jConceptsBriefTitle))/min(len(iConceptsBriefTitle), len(jConceptsBriefTitle))
            score2 = len(iUnionTitleSummary.intersection(jUnionTitleSummary))/min(len(iUnionTitleSummary), len(jUnionTitleSummary))
            
            #             print(score)
            G1.add_edge(nct1, nct2, weight=score1)
            G2.add_edge(nct1, nct2, weight=score2)
            j += 1
        i += 1
    
    print(G1, G2)
    pr1 = nx.pagerank_numpy(G1, alpha=0.9)
    pr2 = nx.pagerank_numpy(G2, alpha=0.9)
    
    pageRankList1 = []
    pageRankList2 = []
    
    listOflist = []
    for key in pr1.keys():
        listRow = []
        listRow.append(key)
        listRow.append(all_trials_df.loc[all_trials_df['nct_id'] == key].values[0][1])
        pageRankScore1 = pr1[key]
        pageRankScore2 = pr2[key]
        listRow.append(pageRankScore1)
        listRow.append(pageRankScore2)
        listOflist.append(listRow)
    
    columns = ['nct_id', 'brief_title', 'page_rank_score_brief_title', 'page_rank_score_brief_title_summary_combined']
    df_ = pd.DataFrame(listOflist, columns=columns)
    df_.to_csv(join("../data/pageRank2QueriesOurMethod", str(row) + query + "_pageRank" + ".csv"), index=False)
#     print(df.brief_title_concepts_list)

In [10]:
for row in range(query_df.shape[0]):
    query = query_df.iloc[row][0]
    print(row, query)
    age = int(query_df.iloc[row][1])
    sex = query_df.iloc[row][2].strip().lower()
    concept = getConceptsFromQuery(query)
    print(age, sex, concept)
#     print(concept)
    
    count = 0
    nct_ids = []
    for rowTrial in range(all_trials_df.shape[0]):
        nct_id = all_trials_df.iloc[rowTrial, 0]
        trialSex, minAge, maxAge = getSexMinAgeMaxAgeFromTrial(rowTrial)
#         print(concepts_dictionary[nct_id]) 
        if (concept in concepts_dictionary_brief_title[nct_id]) and (trialSex=='all' or sex==trialSex) and (age>=minAge and age<=maxAge):
            count += 1
            nct_ids.append(nct_id)
#             print("Yes")
    print(count)
    applyPagerank(nct_ids, query)

0 melanoma
64 male C0025202
909
 
1 melanoma
54 male C0025202
909
 
2 melanoma
80 male C0025202
909
 
3 melanoma
38 male C0025202
908
 
4 melanoma
57 male C0025202
909
 
5 melanoma
67 male C0025202
909
 
6 melanoma
61 male C0025202
909
 
7 melanoma
63 female C0025202
913
 
8 melanoma
34 female C0025202
912
 
9 melanoma
65 female C0025202
913
 
10 melanoma
56 female C0025202
913
 
11 melanoma
62 female C0025202
913
 
12 melanoma
39 female C0025202
912
 
13 melanoma
66 female C0025202
913
 
14 melanoma
70 male C0025202
909
 
15 melanoma
60 male C0025202
909
 
16 melanoma
72 male C0025202
909
 
17 melanoma
48 female C0025202
913
 
18 melanoma
73 male C0025202
909
 
19 melanoma
86 female C0025202
913
 
20 melanoma
49 male C0025202
909
 
21 melanoma
74 female C0025202
913
 
22 melanoma
68 male C0025202
909
 
23 melanoma
47 male C0025202
909
 
24 melanoma
69 female C0025202
913
 
25 colorectal cancer
49 male C0009402
1366
 
26 medullary thyroid carcinoma
45 female C0238462
36
 
27 neuroblast

In [11]:
all_trials_df.loc[all_trials_df['nct_id'] == 'NCT01752257'].values[0][1]

'EF5 in Melanoma Patients'