# Check which Lipids are MARCO and which PIK3 dependent
- Compare lipids found in WT, PTEN, DKO
- Compare WT_ACM to WT_Control, PTEN_ACM to WT_ACM and DKO to WT_ACM
- Check for each lipid if significantly enriched
- PTEN is negatively regulates PIK3 which again would positively have influence on MARCO (hence PTEN KO = MARCO up), --> increase in lipid concentrations
- If upon MARCO / PTEN double KO still high lipid concentration then this means this lipid is controlled by PIK3 but not MARCO
- Do this in Cell Experiment

### Included libraries

In [1]:
import pandas as pd
from scipy.stats import ttest_ind
from matplotlib import cm
from collections import OrderedDict
import matplotlib
from matplotlib import pylab as plt
import numpy as np

### Functions and definitions

In [2]:
#define classes of lipids e.g. PC = Phosphatidylcholines
types_of_Lipids = ['CE','Cer','DAG','LPC','LPE','PC','PE','PI','PS','SM','TAG']

#colormap (20 unique colors)
cmap = cm.get_cmap('tab20')

#assign for each class of lipid a unique color (once as RGB)
lipid_color = {}
lipid_color_rgb = {}
for i,l in enumerate(types_of_Lipids):
    lipid_color[l] = [int(x*255) for x in matplotlib.colors.to_rgb(cmap(i))]
    lipid_color_rgb[l] = cmap(i)

### Main Code

In [3]:
#Load the actual lipid results
LipidData = pd.read_excel('../data/Report_MEC025_LIPID01_jb.xlsx' ,header=2, na_values='<LOD')

In [4]:
#extract the lipids
columns = LipidData.columns
Lipids = columns[7:]
print (Lipids)

Index(['CE 16:0', 'CE 16:1', 'CE 18:0', 'CE 18:1', 'CE 18:2', 'CE 18:3',
       'CE 20:0', 'CE 20:3', 'CE 20:4', 'CE 20:5',
       ...
       'TAG 54:5', 'TAG 54:6', 'TAG 55:1', 'TAG 55:3', 'TAG 56:1', 'TAG 56:4',
       'TAG 56:7', 'TAG 57:1', 'TAG 58:8', 'TAG 58:9'],
      dtype='object', length=283)


In [5]:
#the those columns/entry for cells results (NOT supernatant) as well as WT_control and WT, PTEN and DKO all treated with ACM
data = LipidData.loc[(LipidData['Specimen'] == 'cells') & ((LipidData['Experiment'] == 'WT_C') | (LipidData['Experiment'] == 'WT_ACM') | (LipidData['Experiment'] == 'PTEN_ACM') | (LipidData['Experiment'] == 'DKO_ACM'))]

#remove entries that have no values
data = data.dropna(axis=1,how='all')
data.head(12)

Unnamed: 0,RunID,Sample Code,Sample Identification,Sample Description,Experiment,Specimen,Unit,Cer d34:1,Cer d38:1,Cer d42:2,...,TAG 50:1,TAG 50:2,TAG 50:3,TAG 51:1,TAG 52:2,TAG 52:3,TAG 52:4,TAG 54:4,TAG 54:5,TAG 54:6
0,R100037,102518,1,MA89b___PTEN WT_C _24 h_MA89b_102518,WT_C,cells,pmol,34.359189,27.865418,24.743461,...,96.053349,100.618743,144.652482,117.540473,306.134225,131.963976,198.413325,494.659795,883.936084,508.016828
1,R100037,102519,2,MA89b__PTEN WT_C _24 h_MA89b_102519,WT_C,cells,pmol,46.005993,23.921894,29.101574,...,112.900627,93.192104,109.451561,115.427035,303.072267,87.211812,115.143794,418.043113,607.098093,350.420934
2,R100037,102520,3,MA89b__PTEN WT_C _24 h_MA89b_102520,WT_C,cells,pmol,34.847252,24.506252,28.585149,...,110.574551,86.781271,131.687569,125.213688,267.934925,98.429495,129.611079,434.983663,591.016082,399.367263
3,R100037,102521,4,MA89b__PTEN WT_C _24 h_MA89b_102521,WT_C,cells,pmol,47.093962,22.949029,27.172687,...,87.894841,80.416139,126.533224,104.87604,233.332032,73.163268,104.930808,372.416241,528.076638,300.454063
4,R100037,102522,5,MA89b__PTEN WT_ACM_24 h_MA89b_102522,WT_ACM,cells,pmol,35.670004,21.455848,15.601692,...,154.841233,169.665043,139.292626,74.747775,395.049202,352.490152,205.440234,451.603508,417.751224,263.68781
5,R100037,102524,7,MA89b__PTEN WT_ACM_24 h_MA89b_102524,WT_ACM,cells,pmol,36.751153,21.499523,12.329954,...,291.619431,355.161053,238.559385,95.727273,833.837135,803.38929,367.929087,850.714919,572.608076,314.684843
6,R100037,102525,8,MA89b__PTEN WT_ACM_24 h_MA89b_102525,WT_ACM,cells,pmol,51.373741,27.273843,20.728082,...,452.117546,577.512286,342.204451,93.856461,1306.822086,1415.902045,665.978596,1348.764708,935.795651,373.383526
11,R100037,102530,13,MA89b__PTEN KO_ACM_24 h_MA89b_102530,PTEN_ACM,cells,pmol,32.467781,24.55162,14.704213,...,126.928514,147.118776,126.702837,84.099266,341.343909,277.236658,162.182047,396.10606,499.31754,273.103352
12,R100037,102531,14,MA89b__PTEN KO_ACM_24 h_MA89b_102531,PTEN_ACM,cells,pmol,58.956172,25.167299,39.992327,...,241.262585,313.237458,275.959449,138.233515,670.469614,557.26309,383.828775,701.684056,701.769494,382.231435
13,R100037,102532,15,MA89b__PTEN KO_ACM_24 h_MA89b_102532,PTEN_ACM,cells,pmol,40.608011,22.908303,18.612062,...,214.624525,229.754884,223.010941,107.294867,567.606068,511.651264,279.794737,714.817743,586.447397,377.277043


## Circos plot

The following files:
- ideogram
- conditions
- links
- names

are all needed by the circos plot tool to create circos plots:
http://circos.ca/software/

In [8]:
#remaining lipids contains all valid columns (=lipids)
remaining_Lipids = data.columns.values[7:]

# Create circos plots files (ideogram and conditions)
fp_out = open('../results/WT_PTEN_DKO_Circos/ideogram.txt', 'w')
fp_out3 = open('../results/WT_PTEN_DKO_Circos/conditions.txt', 'w')


# Create the entries for WT/PTEN/DKO (three ribbons)
fp_out.write('chr - ' + 'C1' + ' ' + 'WILD' + ' '  + str(0) + ' '  + str(
    10) + ' '  + 'black' + '\n')
fp_out.write('chr - ' + 'C2' + ' ' + 'PTEN' + ' '  + str(0) + ' '  + str(
    10) + ' '  + 'black' + '\n')
fp_out.write('chr - ' + 'C3' + ' ' + 'DKO' + ' '  + str(0) + ' '  + str(
    10) + ' '  + 'black' + '\n')


# Assign conditions (e.g. name/color)
fp_out3.write('C1' + ' '  + str(0) + ' '  + str(10)  + ' ' + 'WILD'  + ' '  + 'color=black' + '\n')
fp_out3.write('C2' + ' ' + str(0) + ' '  + str(10) +' ' + 'PTEN' + ' '  + 'color=black' + '\n')
fp_out3.write('C3' + ' ' + str(0) + ' '  + str(10) + ' ' + 'DKO' +' '  + 'color=black' + '\n')


# dictionary that contains amount of lipids per lipid class
lipid_count = {}
for l in types_of_Lipids:
    lipid_count[l] = []

# add number of lipids to each lipid class
for lipid in remaining_Lipids:
    l_type = lipid.split(' ')[0]
    lipid_count[l_type].append(lipid)

# create a file with the individual names
fp_out2 = open('../results/WT_PTEN_DKO_Circos/names.txt', 'w')

# sort lipid classes by size: the lipid class with the most lipids first etc. )
ordered_lipids = OrderedDict(sorted(lipid_count.items(), key=lambda x: len(x[1]), reverse=True))

# add lipid ribbons (size correlates to the amount of lipids in each lipid class)
for lipid in ordered_lipids:
    if len(lipid_count[lipid]) > 0:
        fp_out.write('chr - ' + lipid + ' ' + lipid + ' ' + str(0) + ' ' + str(len(lipid_count[lipid])) + ' ' + str(lipid_color[lipid])[1:-1].replace(' ','')  + '\n')
        fp_out2.write(lipid + ' ' + str(0) + ' ' + str(len(lipid_count[lipid])) + ' ' + lipid +' ' + 'color='+str(lipid_color[lipid])[1:-1].replace(' ','') +'\n')

# close all files
fp_out.close()
fp_out2.close()
fp_out3.close()

In [9]:
# define a significance threshold
significance_level = 0.05

# Resultfile containing the individual results 
fp_out_resultsFile = open('../results/WT_PTEN_DKO_Circos/ResultOverview.csv', 'w')
fp_out_resultsFile.write('Lipid,PVal_WT,FC_W,PVal_KO,FC_KO,PVal_DKO,FC_DKO,Category\n')

# definitions for the individual results
Lipids_associated_Pvals = {'WT':[],'PIK3':[],'MARCO':[]}
Lipids_associated = {'WT':[],'PIK3':[],'MARCO':[]}
Lipids_associated_Link = {'WT':'C1','MARCO':'C2','PIK3':'C3'}
Lipids_associated_color = {'WT':'254,227,145,0.4','PIK3':'153,0,13,0.4','MARCO':'43,140,190,0.4'}

# result lists
associated_category = []
associated_color = []
associated_index = []

# go through each lipid
for count,Lipid in enumerate(remaining_Lipids):

    #LOAD WT_Control, WT_ACM, PTEN_ACM and DKO_ACM
    WT_values_control =  data.loc[data['Experiment'] == 'WT_C'][Lipid]
    WT_values =  data.loc[data['Experiment'] == 'WT_ACM'][Lipid]
    PTEN_values =  data.loc[data['Experiment'] == 'PTEN_ACM'][Lipid]
    DKO_Values =  data.loc[data['Experiment'] == 'DKO_ACM'][Lipid]
    
    #drop NA (empty) rows
    WT_values_control = WT_values_control.dropna()
    WT_values = WT_values.dropna()
    PTEN_values = PTEN_values.dropna()
    DKO_Values = DKO_Values.dropna()
    
    #only calculate statistics if all groups have at least one valid entry
    if len(WT_values_control) > 0 and len(WT_values) > 0 and len(PTEN_values) > 0 and len(DKO_Values) >0: 
        
        #compare WT_ACM to WT_Control
        WT_PVal = ttest_ind(WT_values,WT_values_control)[1]
        WT_FoldChange = WT_values.mean()/WT_values_control.mean()
            
        #Compare PTEN_ACM to WT_ACM
        KO_pval =  ttest_ind(WT_values,PTEN_values)[1]
        KO_FolChange =  PTEN_values.mean()/WT_values.mean()
        
        #Compare DKO_ACM to WT_ACM
        DKO_pval =  ttest_ind(WT_values,DKO_Values)[1]
        DKO_FolChange =  DKO_Values.mean()/WT_values.mean()
        
        #l_count = number of lipids ins this lipid class
        lipidType = Lipid.split(' ')[0]
        l_count = lipid_count[lipidType]
        
        #add the corresponding color
        associated_color.append(lipid_color_rgb[lipidType])
        associated_index.append(count+2)
        
        #assign the lipid to the corresponding category (if significant enriched)
        categories = []
        if WT_PVal < significance_level and WT_FoldChange > 1:
            categories.append('WT')
            Lipids_associated['WT'].append(Lipid)
            associated_category.append(0)
        
        if KO_pval < significance_level and KO_FolChange > 1:
            categories.append('MARCO')
            Lipids_associated['MARCO'].append(Lipid)
            associated_category.append(0)
            
        if DKO_pval < significance_level and DKO_FolChange > 1:
            categories.append('PIK3')
            Lipids_associated['PIK3'].append(Lipid)
            associated_category.append(0)
            
        #write entry to result file
        fp_out_resultsFile.write(Lipid+','+str(WT_PVal)+','+str(WT_FoldChange)+','+str(KO_pval)+','+str(KO_FolChange)+','+str(DKO_pval)+','+str(DKO_FolChange)+','+';'.join(categories)+'\n')

#close result file
fp_out_resultsFile.close()

In [10]:
#create links file (important to create the circos plot)
fp_out4 = open('../results/WT_PTEN_DKO_Circos/links.txt', 'w')

#percentages file
fp_out_resultsFile2 = open('../results/WT_PTEN_DKO_Circos/Percentages.csv', 'w')
fp_out_resultsFile2.write('Category,LipidGroup,NumberLipidsInGroup,Associated,Percent\n')

category_To_JuliaName = {'WT':'WT','MARCO':'PTEN','PIK3':'DKO'}

#go through all lipids classes
for key in ['PIK3','WT','MARCO']:

    #go through the three classes (WT, MARCO and PIK3)
    for index,l in enumerate(ordered_lipids): 
        
            #count the number of lipids that belong to the lipid class and e.g. MARCO
            number_Associated = [x.split(' ')[0] for x in Lipids_associated[key]].count(l)
            
            #make a connection if at least one lipid found
            if number_Associated > 0:
                fp_out4.write(l + ' 0 '+str(number_Associated)+' '+Lipids_associated_Link[key]+' '+str(index)+' '+str(index+1)+' color='+Lipids_associated_color[key]+'\n')
            
            #if at least one lipid found
            if len(lipid_count[l]) > 0:
                fp_out_resultsFile2.write(category_To_JuliaName[key]+','+l+','+str(len(lipid_count[l]))+','+str(number_Associated)+','+str(float(number_Associated)/float(len(lipid_count[l])))+'\n')
            
fp_out4.close()
fp_out_resultsFile2.close()