# Molecular Determinants
Instead of focusing on the crude raw data use bootstrapping to emphasize the real differences between increasing/decreasing and emergent. Given that drug perturbation range from a broad range of perturbation e.g. when looking at the feature chemical similarity almost the whole spectrum of similarities is covered but by using bootstrap one can focus on the mean differences and the variation of the mean.

1.) Load all features  
2.) perform bootstrap analysis  
3.) save results (+ plots)

In [1]:
import random
import numpy as np
from matplotlib import pylab as plt
import scipy.stats as stats
from scipy.stats import mannwhitneyu as mu
import seaborn as sns
import os
from math import pi
import math
from sympy import Symbol, solve, sqrt
import networkx as nx

### 1. Load features

In [2]:
fp = open('../data/Molecular_Determinants/DrugPair_Feature_Overview.csv','r')
features = fp.readline().strip().split(',')[4:]
print 'Number of features: %d' %len(features)

interactionTypes = ['NoInteraction','Interaction','Increasing','Decreasing','Emergent']
interaction_colors = {'Increasing':'#ACD900','Decreasing':'#F70020','Emergent':'#0096FF','Interaction':'#F8B301','NoInteraction':'grey'}

dic_feature_results = {}
for f in features:
    dic_feature_results[f] = {}
    for iT in interactionTypes:
        dic_feature_results[f][iT] = []
        

for line in fp:
    tmp = line.strip().split(',')
    interactionType = tmp[3]
    
    
    for f,i in zip(features, range(4,len(tmp))):
        val = tmp[i]
        if val != 'nan':
            val = float(val)
            if interactionType == 'None':
                dic_feature_results[f]['NoInteraction'].append(val)
            else:
                
                
                '''
                if 'Increasing' in interactionType:
                    dic_feature_results[f]['Increasing'].append(val)
                if 'Decreasing' in interactionType:
                    dic_feature_results[f]['Decreasing'].append(val)
                if 'Emergent' in interactionType:
                    dic_feature_results[f]['Emergent'].append(val)
                '''
                if  interactionType ==  'Increasing' or interactionType ==  'Increasing;Increasing':
                    dic_feature_results[f]['Increasing'].append(val)
                    dic_feature_results[f]['Interaction'].append(val)
                if  interactionType == 'Decreasing' or interactionType ==  'Decreasing;Decreasing':
                    dic_feature_results[f]['Decreasing'].append(val)
                    dic_feature_results[f]['Interaction'].append(val)
                if  interactionType == 'Emergent':
                    dic_feature_results[f]['Emergent'].append(val)
                    dic_feature_results[f]['Interaction'].append(val)
print 'Done loading data'

Number of features: 81
Done loading data


### 2. Perform bootstrap analysis

In [3]:
def bootstrapping(data, number_iterations=10000, bootstrap_sample_size = None):
    

    if bootstrap_sample_size == None:
        bootstrap_sample_size = len(data)
    
    
    bootstrap_samples_means = []
    bootstrap_samples_stds = []
    for i in range(0,number_iterations):
        bootstrap_sample = np.random.choice(data,bootstrap_sample_size,replace=True)
        
        bootstrap_samples_means.append(np.mean(bootstrap_sample))
        bootstrap_samples_stds.append(np.std(bootstrap_sample))
        
    return bootstrap_samples_means, bootstrap_samples_stds

In [4]:
def cohen_d(x, y):
    nx = len(x)
    ny = len(y)
    dof = nx + ny - 2
    return (np.mean(x) - np.mean(y)) / np.sqrt(
        ((nx - 1) * np.std(x, ddof=1) ** 2 + (ny - 1) * np.std(y, ddof=1) ** 2) / dof)



In [5]:
alpha = 0.05

fp_out = open('../results/Molecular_Determinants/ResultsOverview.csv','w')
fp_out.write('Feature,InteractionType1,InteractionType2,Mean1,Mean2,FisherTest,PVal,PercentChange/OddsRatio,CohenD,BootstrapSign\n')
for f in features:
    print f
    
    make_Fisher = False
    if all(v == 0 or v ==1 for v in dic_feature_results[f].values()[0]):
        make_Fisher = True
    
    directory = os.path.dirname('../results/Molecular_Determinants/Bootstrapping/' + f + '/')
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    bplot = sns.boxplot(data=[dic_feature_results[f]['NoInteraction'],dic_feature_results[f]['Interaction'],dic_feature_results[f]['Increasing'],dic_feature_results[f]['Decreasing'],dic_feature_results[f]['Emergent']],orient='h',showmeans = True, showfliers = False)

    interaction_types_2 = ['NoInteraction','Interaction','Increasing','Decreasing','Emergent']
    interaction_colors_2 = ['grey','#F8B301','#ACD900','#F70020','#0096FF']
    color_dict = dict(zip(interaction_types_2, interaction_colors_2))
    for i in range(0,5):
        mybox = bplot.artists[i]
        mybox.set_facecolor(color_dict[interaction_types_2[i]])


    plt.title(f)
    plt.yticks(range(0,5),['NoInteraction','NoInteraction','Increasing','Decreasing','Emergent'])
    plt.ylabel('Interaction Type')
    plt.tick_params(axis = 'y', which = 'major', labelsize = 5)
    plt.xlabel('Amount')
    #plt.show()
    plt.savefig(directory+'/Boxplot.pdf')
    plt.close()
    
    
    bootstrap_results = {}
    for iT in interactionTypes:
        bootstrap_results[iT] = {'b_mean':[],'b_std':[]}
        data = dic_feature_results[f][iT]
        b_means, b_stds = bootstrapping(data,number_iterations=10000, bootstrap_sample_size=None)
        
        
        bootstrap_results[iT]['b_mean'] = b_means
        bootstrap_results[iT]['b_std'] = b_means
        
    
    
        plt.hist(b_means,bins='auto', color = interaction_colors[iT], alpha=0.4)
        plt.savefig(directory+'/BootstrapOVerview.pdf')

    
    #plt.show()
    plt.close()
    
    
    
    plt.hist(bootstrap_results['NoInteraction']['b_mean'],bins='auto')
    plt.axvline(np.mean(dic_feature_results[f]['Interaction']),color=interaction_colors['Interaction'])
    plt.axvline(np.mean(dic_feature_results[f]['Increasing']),color=interaction_colors['Increasing'])
    plt.axvline(np.mean(dic_feature_results[f]['Decreasing']),color=interaction_colors['Decreasing'])
    plt.axvline(np.mean(dic_feature_results[f]['Emergent']),color=interaction_colors['Emergent'])
    #plt.show()
    plt.savefig(directory+'/OldBootstrapPlot.pdf')
    plt.close()
    
    for iT1 in interactionTypes:
        for iT2 in interactionTypes:
            if iT1 > iT2:
                data1 = np.array(bootstrap_results[iT1]['b_mean'])
                data2 = np.array(bootstrap_results[iT2]['b_mean'])


                bootstrap_mean_diff =  list(data1 - data2)
                CI = (np.percentile(bootstrap_mean_diff,2.5), np.percentile(bootstrap_mean_diff,97.5))
                    

                bootstrapSign = (0 > CI[0] and 0 > CI[1]) or (0 < CI[0] and 0 < CI[1])
                
                c_d = cohen_d(data1,data2)
                
                #print iT1 +'_' +iT2
                if make_Fisher:
                    group1_Overlap = sum(dic_feature_results[f][iT1])
                    group1_NonOverlap = len(dic_feature_results[f][iT1]) - group1_Overlap

                    group2_Overlap = sum(dic_feature_results[f][iT2])
                    group2_NonOverlap = len(dic_feature_results[f][iT2]) - group2_Overlap

                    effect, pval = stats.fisher_exact([[group1_Overlap, group1_NonOverlap], [group2_Overlap, group2_NonOverlap]])
                    
                else:
                    pval = mu(dic_feature_results[f][iT1],dic_feature_results[f][iT2])[1]
                    effect = (np.mean(data1) - np.mean(data2))/np.mean(data2) * 100
                
                plt.hist(bootstrap_mean_diff,bins='auto', color='grey')
                plt.title(iT1 +'_' +iT2+': %.2f' %pval)
                plt.axvline(CI[0])
                plt.axvline(CI[1])
                plt.axvline(0,c='red',ls='--')
                #plt.show()
                plt.savefig(directory+'/Bootstrap_'+iT1 +'_' +iT2+'.pdf')
                plt.close()

                fp_out.write(f+','+iT1+','+iT2+','+str(np.mean(dic_feature_results[f][iT1]))+','+str(np.mean(dic_feature_results[f][iT2]))+','+str(make_Fisher)+','+str(pval)+','+str(effect)+','+str(c_d)+','+str(bootstrapSign)+'\n')
fp_out.close()

ChemicalSimilarity
PPI_Min_AB_All_Filtered
PPI_Min_AB_All
PPI_Min_AB_DrugBank
PPI_Min_AB_PubChem
PPI_Min_AB_Chembl_Filtered
PPI_Min_AB_DrugBank_Filtered
PPI_Min_AB_Chembl
PPI_Min_AB_PubChem_Filtered
PPI_Mean_AB_All_Filtered
PPI_Mean_AB_All
PPI_Mean_AB_DrugBank
PPI_Mean_AB_PubChem
PPI_Mean_AB_Chembl_Filtered
PPI_Mean_AB_DrugBank_Filtered
PPI_Mean_AB_Chembl
PPI_Mean_AB_PubChem_Filtered
PPI_D_AB_All_Filtered
PPI_D_AB_All
PPI_D_AB_DrugBank
PPI_D_AB_PubChem
PPI_D_AB_Chembl_Filtered
PPI_D_AB_DrugBank_Filtered
PPI_D_AB_Chembl
PPI_D_AB_PubChem_Filtered
PPI_S_AB_All_Filtered
PPI_S_AB_All
PPI_S_AB_DrugBank
PPI_S_AB_PubChem
PPI_S_AB_Chembl_Filtered
PPI_S_AB_DrugBank_Filtered
PPI_S_AB_Chembl
PPI_S_AB_PubChem_Filtered
MsigD_BP
Msig_ChemGen_Perturbation_Overlap
Msig_ChemGen_Perturbation_num
MsigD_CC_num
MsigD_MF_Overlap
Msig_ChemGen_Perturbation
MsigD_BP_Overlap
MsigD_BP_num
MsigD_CC_Overlap
MsigD_MF
MsigD_KeGG_Overlap
MsigD_MF_num
MsigD_CC
MsigD_KeGG_num
MsigD_KeGG
KeGG_Direct_Overlap
KeGG_Indirect

## Radar plots

In [6]:
def radiusAngle_ToCoordinates(r, phi):
    '''
    Transform the radius and angle into x and y coordinates. Depending on the quadrant in, the sin gives either the x
    or y coordinate (and cos the other). As the angle is given between zero and 2pi, depending on the quadrant
    adjusted so we can create triangles

    :param r: radius of the point
    :param phi: angle between 0 and 2pi
    :return: x and y coordinate
    '''

    if phi <= pi / 2:
        #print 'Upper Right'
        x = math.sin(phi) * r
        y = math.cos(phi) * r
        quadr = 'UR'
    elif phi <= pi:
        #print 'Below Right'
        phi = phi - (pi / 2)
        x = math.cos(phi) * r
        y = math.sin(phi) * r * (-1)
        quadr = 'BR'
    elif phi <= (3 * pi) / 2:
        #print 'Below Left'
        phi = phi - (pi)
        x = math.sin(phi) * r * (-1)
        y = math.cos(phi) * r * (-1)
        quadr = 'BL'
    else:
        #print 'Upper Left'
        phi = phi - (3 * pi / 2)
        x = math.cos(phi) * r * (-1)
        y = math.sin(phi) * r
        quadr = 'UL'

    return x, y, quadr

def Find_Intersection(rc, phi1, r1, phi2, r2):
    '''
    Find the intersection of a line drawn between two points (given by their radius and angle) with a circle centered
    around zero and a given radius

    :param rc: radius of the circle
    :param phi1: first angle
    :param r1:   first radius
    :param phi2: second angle
    :param r2:   second radius
    :return: angle of the intersection (as radius has to be rc)
    '''

    # transform radius and angle into x and y coordinates (using sin/cos)
    x1, y1, quadr1 = radiusAngle_ToCoordinates(r1, phi1)
    x2, y2, quadr2 = radiusAngle_ToCoordinates(r2, phi2)

    # Create Function to plot
    # factor = (y2-y1)/(x2-x1)
    # print 'y = %.3fx + %.3f' %(factor,-(factor*x1) + y1)


    # Define the symbol to solve for
    x = Symbol("x")
    # Intersect the line with the circle
    x_intersect = solve(((y2 - y1) * (x - x1)) / (x2 - x1) + y1 - sqrt(
        rc * rc - x * x))  # take positive values of circle results (minus times plus = minus) // gives you all result for the positive circle (> 0)

    # Go thre all POSITIVE VALUES (check if one of the angles is between the two original angles; intersection net to be between)
    for x in x_intersect:

        # Get the corresponding y coordinate
        y_intersect = ((y2 - y1) * (x - x1)) / (x2 - x1) + y1

        # calculate Phi
        result_phi = math.acos(abs(x) / rc)

        # Again adjust to quadrant
        if x >= 0 and y_intersect >= 0:
            #print 'Upper Right'
            result = (pi / 2 - result_phi)
        elif x >= 0 and y_intersect <= 0:
            #print 'Lower Right'
            result = (pi / 2 + result_phi)
        elif x <= 0 and y_intersect <= 0:
            #print 'Lower Left'
            result = (((3 * pi) / 2) - result_phi)
        else:
            #print 'Upper Left'
            result = (((3 * pi) / 2) + result_phi)

        # if proper angle found return
        if result > phi1 and result < phi2:
            return result

    # Define the symbol to solve for
    x = Symbol("x")
    # Intersect the line with the circle
    x_intersect = solve(((y2 - y1) * (x - x1)) / (x2 - x1) + y1 + sqrt(
        rc * rc - x * x))  # take negative values of circle results (minus times plus = minus)// gives you all result for the negative circle (< 0)

    # Go thre all NEGATIVE VALUES (check if one of the angles is between the two original angles; intersection net to be between)
    for x in x_intersect:

        # Get the corresponding y coordinate
        y_intersect = ((y2 - y1) * (x - x1)) / (x2 - x1) + y1

        # calculate Phi
        result_phi = math.acos(abs(x) / rc)

        # Again adjust to quadrant
        if x >= 0 and y_intersect >= 0:
            #print 'Upper Right'
            result = (pi / 2 - result_phi)
        elif x >= 0 and y_intersect <= 0:
            #print 'Lower Right'
            result = (pi / 2 + result_phi)
        elif x <= 0 and y_intersect <= 0:
            #print 'Lower Left'
            result = (((3 * pi) / 2) - result_phi)
        else:
            #print 'Upper Left'
            result = (((3 * pi) / 2) + result_phi)

        # if proper angle found return
        if result > phi1 and result < phi2:
            return result

    return 'Nothing Found'

def my_SpiderPlot(categories, values, color, title,num='None', toNormalizeSmallest='None', toNormalizeBiggest="None"):
    '''
    Create a Spider Plot

    :param categories: categories of the spiderplots (the individual factors)
    :param values: actual values
    :param color:  the colorscheme (e.g. deactivating = red)
    :param title:  name of the spiederplot
    :param num:    in case of overlay (else just None for individual Spiderplots)
    :return:
    '''

    if toNormalizeSmallest !="None":
        #Normalize all values to a pre given value
        nullValue = int(toNormalizeSmallest) - 3

        newValues = [x + abs(toNormalizeSmallest) + 3 for x in values]

        max_yticks = int(toNormalizeBiggest) + 1
    else:
        #Get the lowest value (e.g . -10), for ploting this will be zero; add three so the lowest value is NOT in the middle but a bit away
        nullValue =  int(min(values)) - 3

        #Normalize all values, e.g. the -10 to zero, whereas the zero will be 10 in the plot
        newValues = [x+abs(min(values))+3 for x in values]

        #Define the max tick as max value plus one (for aesthetics)
        max_yticks = int(max(values))+3

    #get the negative ticks and positive ticks
    negative_ticks = [str(x) for x in range(nullValue,0,1)]
    positive_ticks = [str(x) for x in range(0,max_yticks+1,1)]
    negative_ticks.extend(positive_ticks)

    #print negative_ticks
    #exit()
    #Take only 8 tick marks
    to_take = len(negative_ticks)/8
    chosen_ticks = [negative_ticks[x] for x in range(0,len(negative_ticks),to_take)]

    #take the normalized values to plot (i.e. the values where the -10 became the zero
    values = newValues

    #Find number of categories
    N = len(categories)

    # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
    # The total of 2pi (around 6.2) is divided into the amount of categories; In final plot it will be just from 0 till 2 in pi (factor 3.1415 is missing)
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]


    # Initialise the spider plot
    if num != 'None':
        ax = plt.subplot(1, 3, num+ 1, polar=True, )
    else:
        ax = plt.subplot(1, 1, 1, polar=True, )

    # If you want the first axis to be on top:
    ax.set_theta_offset(pi / 2)
    ax.set_theta_direction(-1)

    # Draw one axe per variable + add labels labels yet
    #categories = [x.split('AllRandom')[0] for x in categories]
    plt.xticks(angles[:-1], categories, color='grey', size=8)

    # Draw ylabels
    ax.set_rlabel_position(0)

    #add last value, to close the circle
    values.append(values[0])

    #plot the line
    ax.plot(angles, values, color=color, linewidth=2, linestyle='solid')

    #ax.fill(angles, values, color=color, alpha=0.4)


    #Go threw all the points, whenever there is a switch between a positive and a negative ZScore, the line
    #intersects with the zero line, hence new color; use the find_intersection function to find probper intersection
    i_was = 'Nowhere'
    tmp = []
    tmp_angles = []
    to_save = []
    prev_val = 0
    prev_ang = 0

    angles_to_save_cut = []
    normal_angles = []

    #Go thre all values and angles
    for val,ang in zip(newValues,angles):

        #Check if value is positive or negative
        if val > abs(nullValue):
            i_am = 'Positive'
        else:
            i_am = 'Negative'

        #Check if there is a switch between positive and negative
        if i_was != i_am and i_was != 'Nowhere':

            #Define the radius of the circle (=y)
            y = abs(nullValue)

            #if the last line is between 3 quadrant and the origin (change 0.0 to 6.2831 = 2pi = full circle)
            if prev_ang > 3.15 and ang == 0.0:
                ang = 6.2831

            #Find the actual intersection
            result = Find_Intersection(y,prev_ang,prev_val,ang,val)
            angles_to_save_cut.append(result)

            #if more than one angle belongs to one section, before creating new tmp, add current to save
            if len(tmp) >0:
                to_save.append(tmp)
                normal_angles.append(tmp_angles)

            #start new tmp (= section of color)
            tmp = [val]
            tmp_angles = [ang]
        #if still in same section just add angle and value
        else:
            tmp.append(val)
            tmp_angles.append(ang)

        #Remember previous location
        i_was = i_am
        prev_val = val
        prev_ang = ang

    #Final results of intersection parts (angles and values)
    to_save.append(tmp)
    normal_angles.append(tmp_angles)

    #make a fine grained amount of angles (361 individual degrees), and close circle again
    angles2 = [n / float(360) * 2 * pi for n in range(360)]
    angles2 += angles2[:1]

    #Define color scheme
    '''
    colorscheme = {'green':{0:'#acd900',1:'#a6c143',2:'#648a58',3:'#5c5e4c',4:'#acd900',5:'#a6c143',6:'#648a58',7:'#5c5e4c'},
                   'red': {0: '#f70020', 1: '#e66a22', 2: '#e79935', 3: '#dcb471', 4: '#f70020',5:'#e66a22',6:'#e79935',7:'#dcb471'},
                   'blue':{0: '#0096ff', 1: '#2bbfb8', 2: '#29a2ac', 3: '#4c7584', 4: '#0096ff', 5: '#2bbfb8',6:'#29a2ac',7:'#4c7584'},
                   'grey':{0:'#252525',1:'#636363',2:'#969696',3:'#cccccc',4:'#f7f7f7'}

                   }
    '''
    '''
    colorscheme = {'green':{0:'#acd900',1:'#acd900',2:'#acd900',3:'#acd900',4:'#acd900',5:'#acd900',6:'#acd900',7:'#acd900'},
                   'red': {0: '#f70020', 1: '#f70020', 2: '#f70020', 3: '#f70020', 4: '#f70020',5:'#f70020',6:'#f70020',7:'#f70020'},
                   'blue':{0: '#0096ff', 1: '#0096ff', 2: '#0096ff', 3: '#0096ff', 4: '#0096ff', 5: '#0096ff',6:'#0096ff',7:'#0096ff'},
                   'grey':{0:'#252525',1:'#252525',2:'#252525',3:'#252525',4:'#252525'}

    }
    '''
    colorscheme = {'green':{0:'#acd900',1:'#a6c143',2:'#acd900',3:'#a6c143',4:'#acd900',5:'#a6c143',6:'#acd900',7:'#a6c143'},
                   'red': {0: '#f70020', 1: '#e66a22', 2: '#f70020', 3: '#e66a22', 4: '#f70020',5:'#e66a22',6:'#f70020',7:'#e66a22'},
                   'blue':{0: '#0096ff', 1: '#2bbfb8', 2: '#0096ff', 3: '#2bbfb8', 4: '#0096ff', 5: '#2bbfb8',6:'#0096ff',7:'#2bbfb8'},
                   'grey':{0:'#252525',1:'#636363',2:'#252525',3:'#636363',4:'#252525'}

    }


    #If the first section is bigger than one immedieatly
    nofirstcut = False
    if len(to_save[0]) > 0:
        angles_to_save_cut.insert(0,0)
        nofirstcut = True
    angles_to_save_cut += angles_to_save_cut[:1]


    #fill the individual parts
    for i in range(0,len(to_save)):

        #save_cut[i] to savecut[i+1] define the whole area, + add all the angles between these two
        to_fill_angles = [angles_to_save_cut[i]]
        to_fill_Values = [abs(nullValue)]

        to_fill_Values.extend(to_save[i])
        to_fill_angles.extend(normal_angles[i])

        to_fill_angles.append(angles_to_save_cut[i+1])
        to_fill_Values.append(abs(nullValue))


        #This part followes the zero line back to define where things should be filled
        if angles_to_save_cut[i+1] > angles_to_save_cut[i]:
            go_back = [x for x in angles2 if x < angles_to_save_cut[i+1] and x > angles_to_save_cut[i]]
            go_back = go_back[::-1]
            go_back.pop(0)

        else:
            go_back = [x for x in angles2 if  x < angles_to_save_cut[i+1]]
            go_back2 = [x for x in angles2 if x > angles_to_save_cut[i]]

            go_back = go_back[::-1]
            if 0 in go_back:
                go_back.pop(0)

            go_back2 = go_back2[::-1]

            go_back.extend(go_back2)

        #add here the previously go back angles and values (values is always the radius of the zero line)
        to_fill_angles.extend(go_back)
        to_fill_Values.extend([abs(nullValue)] * len(go_back))

        #in case there is a not directly a first cut adjust color
        if nofirstcut == True and i == len(to_save)-1:

            ax.fill(to_fill_angles, to_fill_Values, color=colorscheme[color][0])

        else:
            ax.fill(to_fill_angles, to_fill_Values, color=colorscheme[color][i])


    #for a,v in zip(angles,values):
    #    if  abs(v - abs(nullValue)) > 0:
    #        ax.scatter(a, v, color ='black',zorder=10)




    #Plot the zero zScore line plus and minus 2 (significance
    plt.plot(angles2,[abs(nullValue)]*361, color = 'black')
    #plt.plot(angles2, [abs(nullValue) + 2] * 361, color='grey', ls = '--')
    #plt.plot(angles2, [abs(nullValue) - 2] * 361, color='grey', ls ='--')
    plt.yticks(range(0,len(negative_ticks),to_take),chosen_ticks)
    # Add a title
    plt.title(title, size=11, color=color, y=1.1)
    plt.setp( ax.get_yticklabels(), visible=False)
    plt.setp( ax.get_xticklabels(), visible=False)



In [7]:
selected_results = {'Increasing':{},'Decreasing':{},'Emergent':{},'Interaction':{}}
compare_to = 'Interaction'

#selected_features = ['ChemicalSimilarity','PPI_Mean_AB_All_Filtered','MsigD_BP_Overlap','MsigD_MF_Overlap','MsigD_CC_Overlap','MsigD_KeGG_Overlap','SideEffects_CLOUD_to_Offsides_Overlap','SideEffects_TwoSide_CLOUDs','Transporters_Overlap','Enzymes_Overlap']
selected_features = ['ChemicalSimilarity','Enzymes_Overlap','Transporters_Overlap','PPI_Mean_AB_All_Filtered',
                     'KeGG_Indirect_Overlap','GO_Component','GO_Function','GO_Process','Msig_ChemGen_Perturbation_Overlap',
                     'SideEffects_CLOUD_to_Offsides_Overlap','SideEffects_TwoSide_CLOUDs','Disease']



fp = open('../results/Molecular_Determinants/ResultsOverview.csv','r')
fp.next()
for line in fp:
    tmp = line.strip().split(',')
    
    if tmp[0] in selected_features:
        if tmp[1] == compare_to:
            selected_results[tmp[2]][tmp[0]] = float(tmp[8]) * -1

In [9]:
interaction_colors = {'Increasing':'green','Decreasing':'red','Emergent':'blue','Interaction':'grey'}
#interaction_colors = {'Increasing':'#ACD900','Decreasing':'#F70020','Emergent':'#0096FF','All':'grey'}

for key in ['Increasing','Decreasing','Emergent']:
    part =  key
    categories = []
    values = []
    for f in selected_features:
        categories.append(f)
        values.append(selected_results[key][f])
        
    my_SpiderPlot(categories, values, interaction_colors[part], part,'None',-1,2) #-1 and 2 for compare to Interaction, or -11 and 6
    plt.savefig('../results/Molecular_Determinants/SpiderPlots/'+part+'.pdf',format='pdf')
    plt.close()



for key in ['Increasing','Decreasing','Emergent']:
    part =  key
    categories = []
    values = []
    for f in selected_features:
        categories.append(f)
        values.append(selected_results[key][f])
        
    my_SpiderPlot(categories, values, interaction_colors[part], part,'None',-1,2) #-1 and 2 for compare to Interaction, or -11 and 6
#plt.show()
plt.savefig('../results/Molecular_Determinants/SpiderPlots/Combined.pdf',format='pdf')
plt.close()



### Show overview significance results

In [10]:
#selected_features = ['ChemicalSimilarity','PPI_Mean_AB_All_Filtered','MsigD_BP_Overlap','MsigD_MF_Overlap','MsigD_CC_Overlap','MsigD_KeGG_Overlap','SideEffects_CLOUD_to_Offsides_Overlap','SideEffects_TwoSide_CLOUDs','Transporters_Overlap','Enzymes_Overlap']
selected_features = ['ChemicalSimilarity','Enzymes_Overlap','Transporters_Overlap','PPI_Mean_AB_All_Filtered',
                     'KeGG_Indirect_Overlap','GO_Component','GO_Function','GO_Process','Msig_ChemGen_Perturbation_Overlap',
                     'SideEffects_CLOUD_to_Offsides_Overlap','SideEffects_TwoSide_CLOUDs','Disease']


interaction_colors = {'Increasing':'#ACD900','Decreasing':'#F70020','Emergent':'#0096FF','All':'black'}
interaction_sizes = {'Increasing':200,'Decreasing':200,'Emergent':200,'All':2000}
network_parts = ['Complete','Core','CoreToPeriphery','Periphery']

fp = open('../data/Molecular_Determinants/DrugPair_Feature_Overview.csv','r')

network_part_interactions = {}

for part in network_parts:
    network_part_interactions[part] = []        
    network_part = nx.read_gml('../data/Molecular_Determinants/Networks/DPI_Network_'+part+'.gml')

    network_part_interactions[part] = network_part

features = fp.readline().strip().split(',')[4:]

network_part_values = {}

for f in range(0,len(features)):
#for f in range(0,3):

    fp.seek(0)
    fp.next()


    if features[f] not in selected_features:
        continue

    print features[f]
    network_part_values[features[f]] = {}
        

    for part in network_parts:
        network_part_values[features[f]][part] = {'Increasing':[],'Decreasing':[],'Emergent':[]}
    network_part_values[features[f]]['AllCLOUDS'] = []
    network_part_values[features[f]]['NonInteracting'] = []

    
    for line in fp:
        tmp = line.strip().split(',')

        #== shortest path
        if tmp[f+4] == 'nan':
            continue


        interaction_found = False
        
        #only include pure single edges
        if tmp[3] == 'Increasing' or  tmp[3] == 'Decreasing' or  tmp[3] == 'Emergent' or tmp[3] == 'None':

            network_part_values[features[f]]['AllCLOUDS'].append(float(tmp[f+4]))

            for part in network_parts:
                if  network_part_interactions[part].has_edge(tmp[0],tmp[1]):
                    interaction_found = True
                    for key in network_part_interactions[part][tmp[0]][tmp[1]]:
                        network_part_values[features[f]][part][network_part_interactions[part][tmp[0]][tmp[1]][key]['Type']].append(float(tmp[f+4]))

                if  network_part_interactions[part].has_edge(tmp[1],tmp[0]):
                    interaction_found = True
                    for key in network_part_interactions[part][tmp[1]][tmp[0]]:
                        network_part_values[features[f]][part][network_part_interactions[part][tmp[1]][tmp[0]][key]['Type']].append(float(tmp[f+4]))

            if interaction_found == False:
                network_part_values[features[f]]['NonInteracting'].append(float(tmp[f+4]))
fp.close()

ChemicalSimilarity
PPI_Mean_AB_All_Filtered
Msig_ChemGen_Perturbation_Overlap
KeGG_Indirect_Overlap
SideEffects_TwoSide_CLOUDs
SideEffects_CLOUD_to_Offsides_Overlap
Transporters_Overlap
Enzymes_Overlap
GO_Function
GO_Process
GO_Component
Disease


In [11]:
all_Features = selected_features
#all_Features = network_part_values.keys()
#all_Features.sort()
number_features = len(all_Features)

plt.figure(figsize=(number_features,4))
#plt.tight_layout()
feature_names = []
current_x = 0.8
for f in  all_Features:
    print f

    feature_names.append(f)

    no_interaction_values = network_part_values[f]['NonInteracting']


    y = 4.2


    for part in network_parts:
        #print part


        AllInteractions = network_part_values[f][part]['Increasing']  + network_part_values[f][part]['Decreasing'] + network_part_values[f][part]['Emergent']
        #print AllInteractions

        things_to_test = {'All':AllInteractions,'Increasing':network_part_values[f][part]['Increasing'],'Decreasing':network_part_values[f][part]['Decreasing'],'Emergent':network_part_values[f][part]['Emergent']}


        continues_features = True
        if all(v == 0 or v ==1 for v in no_interaction_values):
            continues_features = False

        x = current_x
        for subset in ['All','Increasing','Decreasing','Emergent']:
               
            if  continues_features:
                direction = np.mean(things_to_test[subset]) > np.mean(no_interaction_values)
                sign =  mu(things_to_test[subset],no_interaction_values)[1] < 0.05
            else:

                real_Overlap = sum(things_to_test[subset])
                real_NonOverlap = len(things_to_test[subset]) - real_Overlap

                non_Interactions_Overlap = sum(no_interaction_values)
                non_Interactions_NonOverlap = len(no_interaction_values) - sum(no_interaction_values)

                oddsratio, pvalue = stats.fisher_exact([[real_Overlap, real_NonOverlap], [non_Interactions_Overlap, non_Interactions_NonOverlap]])

                sign = pvalue < 0.05
                direction = oddsratio > 1




            if direction:
                symbol = '^'
            else:
                symbol = 'v'

            if sign:
                color = interaction_colors[subset]
            else:
                color = 'grey'

            x = x + 0.2
            plt.scatter([x],[y],marker=symbol, s=interaction_sizes[subset], color=color)
        y = y - 1
    current_x = current_x + 1



#print network_part_values['Core']

directory = os.path.dirname('../results/Molecular_Determinants/')
if not os.path.exists(directory):
    os.makedirs(directory)

plt.ylim([0.6,4.8])
plt.xlim([0.3,number_features+1])
plt.yticks([1.2,2.2,3.2,4.2],['Periphery','CoreToPeriphery','Core','Complete'])
plt.xticks(range(1,number_features),feature_names, rotation='vertical')
#plt.show()
plt.savefig('../results/Molecular_Determinants/Overviewplot.pdf', bbox_inches = "tight")
plt.close()



ChemicalSimilarity
Enzymes_Overlap
Transporters_Overlap
PPI_Mean_AB_All_Filtered
KeGG_Indirect_Overlap
GO_Component
GO_Function
GO_Process
Msig_ChemGen_Perturbation_Overlap
SideEffects_CLOUD_to_Offsides_Overlap
SideEffects_TwoSide_CLOUDs
Disease
