# Check Lipid differences in WT, KO and DKO
- Show if some Lipids are particularly high in one of the three categories

### Included libraries

In [1]:
from matplotlib import cm
from matplotlib.lines import Line2D
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pylab as plt
import numpy as np
import seaborn as sns

### Functions and definitions

In [2]:
#define classes of lipids e.g. PC = Phosphatidylcholines
types_of_Lipids = ['CE','Cer','DAG','LPC','LPE','PC','PE','PI','PS','SM','TAG']

#colormap (20 unique colors)
cmap = cm.get_cmap('tab20')

#assign for each class of lipid a unique color
lipid_color = {}
for i,l in enumerate(types_of_Lipids):
    lipid_color[l] = cmap(i)

### Main Code

In [3]:
#Load the actual lipid results
LipidData = pd.read_excel('../data/Report_MEC025_LIPID01_jb.xlsx' ,header=2, na_values='<LOD')

In [4]:
#extract the lipids
columns = LipidData.columns
Lipids = columns[7:]
print (Lipids)

Index(['CE 16:0', 'CE 16:1', 'CE 18:0', 'CE 18:1', 'CE 18:2', 'CE 18:3',
       'CE 20:0', 'CE 20:3', 'CE 20:4', 'CE 20:5',
       ...
       'TAG 54:5', 'TAG 54:6', 'TAG 55:1', 'TAG 55:3', 'TAG 56:1', 'TAG 56:4',
       'TAG 56:7', 'TAG 57:1', 'TAG 58:8', 'TAG 58:9'],
      dtype='object', length=283)


In [5]:
#the those columns/entry for serum results (NOT Cells) as well as WT, PTEN and DKO (all HFD)
data = LipidData.loc[(LipidData['Specimen'] == 'serum') & ((LipidData['Experiment'] == 'WT_HFD') | (LipidData['Experiment'] == 'PTEN_HFD') | (LipidData['Experiment'] == 'DKO_HFD'))]

#remove entries that have no values
data = data.dropna(axis=1,how='all')
data.head(12)

Unnamed: 0,RunID,Sample Code,Sample Identification,Sample Description,Experiment,Specimen,Unit,CE 18:1,CE 18:2,CE 18:3,...,TAG 54:2,TAG 54:3,TAG 54:4,TAG 54:5,TAG 54:6,TAG 55:3,TAG 56:4,TAG 56:7,TAG 58:8,TAG 58:9
48,R100037,102567,50,MA86__1132_DKO_20w HFD_MA86_102567,DKO_HFD,serum,µM,607.956022,72864.992295,3002.12372,...,24.896831,89.480398,98.588407,43.480353,33.390152,0.633238,10.206274,29.782913,9.974612,10.368189
49,R100037,102568,51,MA86__1133_DKO_20w HFD_MA86_102568,DKO_HFD,serum,µM,402.976122,53147.603236,2396.179833,...,10.15025,48.40202,62.283069,34.487262,28.71127,0.278982,5.631952,28.976964,8.037158,9.424567
50,R100037,102569,52,MA86__1145_WT_20w HFD_MA86_102569,WT_HFD,serum,µM,430.90582,81580.339563,3166.628627,...,16.086543,80.528987,86.122444,43.376575,32.473481,0.513254,9.325417,29.056677,9.307639,11.281162
51,R100037,102570,53,MA86__1146_WT_20w HFD_MA86_102570,WT_HFD,serum,µM,376.611546,53702.218677,2032.826665,...,12.964261,48.933192,57.961093,25.910846,23.3176,0.281729,4.964431,20.953996,6.162456,8.889755
52,R100037,102571,54,MA86__1147_PTEN KO_20w HFD_MA86_102571,PTEN_HFD,serum,µM,374.749841,38775.692923,1556.497369,...,16.517914,57.851267,68.688443,30.503781,26.798037,0.386975,6.008675,24.037804,7.25038,8.898583
53,R100037,102572,55,MA86__1155_DKO_20w HFD_MA86_102572,DKO_HFD,serum,µM,964.432179,138602.137424,5845.431951,...,16.501383,76.600014,86.434272,44.644121,39.185787,0.484677,9.542706,34.688408,10.810573,13.744266
54,R100037,102573,56,MA86__1160_DKO_20w HFD_MA86_102573,DKO_HFD,serum,µM,1287.396636,148343.845896,5754.735475,...,21.960342,89.372925,101.119447,41.073122,37.50362,0.600271,10.403612,37.301976,12.663602,14.912134
55,R100037,102574,57,MA86__1165_PTEN KO_20w HFD_MA86_102574,PTEN_HFD,serum,µM,939.374447,148385.145597,5880.899701,...,17.11968,76.849588,87.627162,46.566287,37.602639,0.462369,8.189346,33.580649,9.732728,12.01867
56,R100037,102575,58,MA86__1166_WT_20w HFD_MA86_102575,WT_HFD,serum,µM,891.367598,154932.585514,5379.371793,...,12.451955,55.933581,56.365019,25.353963,29.668491,0.339827,6.201129,26.070387,8.125969,13.194814
57,R100037,102576,59,MA86__1167_WT_20w HFD_MA86_102576,WT_HFD,serum,µM,767.207553,147249.740331,6455.170972,...,25.254516,106.998655,136.183586,73.276264,50.221942,0.636314,10.99928,39.900871,10.807399,12.237579


In [6]:
#remaining lipids contains all valid columns (=lipids)
remaining_Lipids = data.columns.values[7:]
print ('Number of remaining Lipids: %d' %len(remaining_Lipids))

Number of remaining Lipids: 202


In [18]:
# Make bar plot Lipids together
fp_out = open('../results/Difference_WT_KO_DKO_Serum/Normalization.csv','w')
fp_out.write('Lipid,Mean_WT,Normalized_WT,Mean_KO,Normalized_KO,Mean_DKO,Normalized_DKO,Max_Val,Min_Val\n')

#dictionary that contins the results for WT, KO (=PTEN) and DKO (=PTEN and MARCO KO)
Lipid_Group_Results = {'WT':{},'KO':{},'DKO':{}}

#possible groups contains the lipid classses
possible_groups = set()

#go through all lipids
for Lipid in remaining_Lipids:
    
    
    # get the lipid replicates for the three groups (remove emtpy rows)
    #WT
    WT_values =  data.loc[data['Experiment'] == 'WT_HFD'][Lipid]
    WT_values = WT_values.dropna()
    #PTEN
    KO_values =  data.loc[data['Experiment'] == 'PTEN_HFD'][Lipid]
    KO_values = KO_values.dropna()
    #DKO
    DKO_values =  data.loc[data['Experiment'] == 'DKO_HFD'][Lipid]
    DKO_values = DKO_values.dropna()
    
    #Only make analaysis if all three groups have at least one entry (=replicate)
    if len(WT_values) > 0 and len(KO_values) > 0 and len(DKO_values) > 0:   
    
        #calculate the mean for the three groups
        WT_values = WT_values.mean()
        KO_values = KO_values.mean()
        DKO_values = DKO_values.mean()
        
        #extract Max/Min of the three means
        max_Val = max([WT_values,KO_values,DKO_values])
        min_Val = min([WT_values,KO_values,DKO_values])

        #normalize between this max/min (so that one value is always zero and one will be always 1 and the other in between)
        WT_values_norm = (WT_values-min_Val)/(max_Val-min_Val)
        KO_values_norm = (KO_values-min_Val)/(max_Val-min_Val)
        DKO_values_norm = (DKO_values-min_Val)/(max_Val-min_Val)

        #write results
        fp_out.write(Lipid+','+str(WT_values)+','+str(WT_values_norm)+','+
                     str(KO_values)+','+str(KO_values_norm)+','+
                     str(DKO_values)+','+str(DKO_values_norm)+','+
                     str(max_Val)+','+str(min_Val)+'\n')

        #if the result dictionary does not have an entry for this lipid class then add to dictionary
        if Lipid.split(' ')[0] not in Lipid_Group_Results['WT']:
            Lipid_Group_Results['WT'][Lipid.split(' ')[0]] = []
        if Lipid.split(' ')[0] not in Lipid_Group_Results['KO']:
            Lipid_Group_Results['KO'][Lipid.split(' ')[0]] = []
        if Lipid.split(' ')[0] not in Lipid_Group_Results['DKO'] :
            Lipid_Group_Results['DKO'][Lipid.split(' ')[0]] = []

        #add lipid class to set of all possible lipid classes
        possible_groups.add(Lipid.split(' ')[0])

        #write results
        Lipid_Group_Results['WT'][Lipid.split(' ')[0]].append(WT_values_norm)
        Lipid_Group_Results['KO'][Lipid.split(' ')[0]].append(KO_values_norm)
        Lipid_Group_Results['DKO'][Lipid.split(' ')[0]].append(DKO_values_norm)

#close file
fp_out.close() 

In [21]:
# Make actual plot
##

#create legend entries
legend_elements = []
for key in possible_groups:
    legend_elements.append(Line2D([0], [0], marker='o', color='w', label=key,
                      markerfacecolor=lipid_color[key], markersize=10))

#list of means (go throug the results to calculate actual mean (per lipid class))
WT_means = []
KO_means = []
DKO_means = []

#go through all lipid groups
for key in possible_groups:
    #make plot showing the mean results for each this lipid group (no errorbars)
    plt.errorbar([0,1,2,3,4,5],[np.mean(Lipid_Group_Results['WT'][key]),np.mean(Lipid_Group_Results['WT'][key]),
                            np.mean(Lipid_Group_Results['KO'][key]), np.mean(Lipid_Group_Results['KO'][key]),
                            np.mean(Lipid_Group_Results['DKO'][key]),  np.mean(Lipid_Group_Results['DKO'][key])], 
                 
            #assign assocaited color
             color=lipid_color[key], alpha=0.8,lw=1.5)
    
    #add result to result lise
    WT_means.append(np.mean(Lipid_Group_Results['WT'][key]))
    KO_means.append(np.mean(Lipid_Group_Results['KO'][key]))
    DKO_means.append(np.mean(Lipid_Group_Results['DKO'][key]))
        
    #create legend element
    plt.legend(handles=legend_elements, loc='right',prop={'size': 5})


#plot the averall mean (over all lipids, blask dashed line)    
plt.plot([0,1,2,3,4,5],[np.mean(WT_means),np.mean(WT_means),
                        np.mean(KO_means), np.mean(KO_means),
                        np.mean(DKO_means),  np.mean(DKO_means)], 

         color='black', alpha=1,ls = '--', lw=2,zorder=100)


#Plot actual plot   
plt.ylabel('Mean Normalized Relative Abundance')
plt.xlabel('Condition')
plt.xticks([0.5,2.5,4.5],['WT','KO','DKO'])
plt.savefig('../results/Difference_WT_KO_DKO_Serum/LipidGroups.pdf')
plt.close()


### Additional plot as heatmap showing lipids

In [22]:

#lists for data to plot
data = []
data_allLipids = []
col_colors = []

#go through all lipid groups to define correct color
for key in possible_groups:
        for lipid in Lipid_Group_Results['WT'][key]:
            col_colors.append(lipid_color[key])

#calculate mean expression for this lipid
for group in ['WT','KO','DKO']:
    tmp = []
    tmp_allLipids = []
    for key in possible_groups:
        tmp.append(np.mean(Lipid_Group_Results[group][key]))
        for lipid in Lipid_Group_Results[group][key]:
            tmp_allLipids.append(lipid)
        
    data.append(tmp)
    data_allLipids.append(tmp_allLipids)
    
#Make heatmap for LIPIDGROUPS
sns.heatmap(data=data)
plt.xlabel('Lipid Group')
plt.ylabel('Category')
plt.xticks([x-0.5 for x in range(1,len(possible_groups)+1)],possible_groups)
plt.yticks([0.5,1.5,2.5],['WT','PTEN','DKO'])
plt.savefig('../results/Difference_WT_KO_DKO_Serum/LipidGroups_Heatmap.pdf')
plt.close()


#Make heatmap for LIPIDS INDIVIDUALLY
sns.heatmap(data=data_allLipids)
plt.xlabel('Lipid')
plt.ylabel('Category')
plt.yticks([0.5,1.5,2.5],['WT','PTEN','DKO'])
plt.xticks()
plt.savefig('../results/Difference_WT_KO_DKO_Serum/Lipid_Heatmap.pdf')
plt.close()


#Make clustermap for LIPIDS INDIVIDUALLY
sns.clustermap(data=data_allLipids,row_cluster=True, col_colors=col_colors,yticklabels=['WT','PTEN','DKO'], method='weighted', )
plt.savefig('../results/Difference_WT_KO_DKO_Serum/Lipid_Clustermap.pdf')
plt.close()