## Load Libraries

In [161]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import decomposition
from sklearn.preprocessing import normalize
import matplotlib.cm as cm
import matplotlib.patches as patches
import matplotlib.colors as colors
from IPython.display import display, HTML

## Load data

In [162]:
data_id = '2017-05-12_MTP_R1'
# Microtiter Plate (2017-05-12, 1 plate, 1/3 repeats)
meta = pd.read_csv("data/{}.meta".format(data_id), sep='\t')
output = pd.read_csv("data/{}.csv".format(data_id), sep=',', index_col=0)


# load peptide and virus coordinates
gal_pep = pd.read_csv("data/{}_pep.gal".format(data_id), sep='\t', index_col="ID")
gal_vir = pd.read_csv("data/{}_vir.gal".format(data_id), sep='\t', index_col="ID")
vir_cor = gal_vir.pivot(index="Row", columns="Column", values="Name")
pep_cor = gal_pep.pivot(index="Row", columns="Column", values="Name")

#merge complete spotinformation
vir_cor_unstacked=vir_cor.unstack()
Spot=pep_cor.unstack()
Spot=Spot.reset_index()
Spot=Spot.rename( columns={0: "Peptide"})
Spot["Virus"]= vir_cor_unstacked.values
Spot["Intensity"]=output.unstack().values
Spot["Replica"]= 0

for virus_unique in Spot["Virus"].unique():
    for peptide_unique in Spot["Peptide"].unique():
        replica= 0
        for index in Spot.index:
            if Spot["Virus"][index]== virus_unique and Spot["Peptide"][index]== peptide_unique:
                Spot.set_value(index,"Replica",replica)
                replica+=1
    

print("-"*80)
print("meta data")
#display meta data 
display(meta.head())
print("-"*80)
print("Reader Output")
#display reader output
display(output.T.head())
print("-"*80)
print("peptide position")
#display peptide positions
display(pep_cor.T.head())
print("-"*80)
print("virus position")
#display virus positions
display(vir_cor.T.head())
print("-"*80)
print("Respaped as complete spot information")
#display complete spot information
display(Spot.head())
print("-"*80)
print("number of replica")
#Number of Replica
display(Spot.pivot_table(index="Virus",columns="Peptide",values="Intensity", aggfunc='count'))
print("-"*80)

--------------------------------------------------------------------------------
meta data


Unnamed: 0,field,info
0,type,Microtiter Plate
1,date,2017-05-12
2,repeat,1
3,total_repeats,3
4,virus,"[""X31A"",""X31"",""X31"",""X31"",""Pan"",""Pan"",""Pan"",""C..."


--------------------------------------------------------------------------------
Reader Output


21238,A,B,C,D,E,F,G,H
1,22508,21229,21266,21238,20484,14842,33306,17090
2,20337,22922,23449,23548,24198,17464,37703,20670
3,20394,21386,23071,26460,27889,17428,40646,22175
4,18970,19488,20933,23260,25058,17656,39031,22479
5,17500,16958,18548,18157,18304,15398,27502,17905


--------------------------------------------------------------------------------
peptide position


Row,1,2,3,4,5,6,7,8
Column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Peptid 6,Peptid 7,Peptid 8,Peptid 9,Peptid 11,Fetuin,Peptid Nenad,Puffer
2,Peptid 6,Peptid 7,Peptid 8,Peptid 9,Peptid 11,Fetuin,Peptid Nenad,Puffer
3,Peptid 6,Peptid 7,Peptid 8,Peptid 9,Peptid 11,Fetuin,Peptid Nenad,Puffer
4,Peptid 6,Peptid 7,Peptid 8,Peptid 9,Peptid 11,Fetuin,Peptid Nenad,Puffer
5,Peptid 6,Peptid 7,Peptid 8,Peptid 9,Peptid 11,Fetuin,Peptid Nenad,Puffer


--------------------------------------------------------------------------------
virus position


Row,1,2,3,4,5,6,7,8
Column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,X31C,X31C,X31C,X31C,X31C,X31C,X31C,X31C
2,X31A,X31A,X31A,X31A,X31A,X31A,X31A,X31A
3,X31A,X31A,X31A,X31A,X31A,X31A,X31A,X31A
4,X31A,X31A,X31A,X31A,X31A,X31A,X31A,X31A
5,PAN1,PAN1,PAN1,PAN1,PAN1,PAN1,PAN1,PAN1


--------------------------------------------------------------------------------
Respaped as complete spot information


Unnamed: 0,Column,Row,Peptide,Virus,Intensity,Replica
0,1,1,Peptid 6,X31C,22508,0
1,1,2,Peptid 7,X31C,21229,0
2,1,3,Peptid 8,X31C,21266,0
3,1,4,Peptid 9,X31C,21238,0
4,1,5,Peptid 11,X31C,20484,0


--------------------------------------------------------------------------------
number of replica


Peptide,Fetuin,Peptid 11,Peptid 6,Peptid 7,Peptid 8,Peptid 9,Peptid Nenad,Puffer
Virus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CAL1,3,3,3,3,3,3,3,3
NO,2,2,2,2,2,2,2,2
PAN1,3,3,3,3,3,3,3,3
X31A,3,3,3,3,3,3,3,3
X31C,1,1,1,1,1,1,1,1


--------------------------------------------------------------------------------


## build complementary files from original data

In [163]:
## make 
#find new id-name for measurment
data_id_div = '2017-05-12_MTP_R1_sub_back'
#data_id_div = '2017-05-12_MTP_R1_sub_back'
# write meta,pep_gal and vir_gal file. 
meta.to_csv("data/{}.meta".format(data_id_div), sep='\t')
gal_pep.to_csv("data/{}_pep.gal".format(data_id_div), sep='\t')
gal_vir.to_csv("data/{}_vir.gal".format(data_id_div), sep='\t')

## perpocess the spotintensities as wanted (e.g underground subtraction ...)

In [170]:
underground = Spot.loc[(Spot['Peptide'] == "Puffer")]
# mean and std per virus
underground_mean = underground.pivot_table(values="Intensity", index="Virus", aggfunc=[np.mean, np.std]) #std is std of mean
Spot_no_underground = Spot.copy()
Spot_no_underground["std"]=0.0
for unique_virus_index in range(len(underground_mean)):
    for index in Spot.index.values:
        if Spot["Virus"][index] == underground_mean.index[unique_virus_index]:
            if data_id_div == '2017-05-12_MTP_R1_sub_back':
                #preproccecing the data
                Spot_no_underground.set_value(index,"Intensity",value=(Spot["Intensity"][index]-underground_mean["mean"]["Intensity"][unique_virus_index]))
                Spot_no_underground.set_value(index,"std",underground_mean["std"]["Intensity"][unique_virus_index])
            elif data_id_div == '2017-05-12_MTP_R1_sub_back':
                #preproccecing the data
                Spot_no_underground.set_value(index,"Intensity",value=Spot["Intensity"][index]/underground_mean["mean"]["Intensity"][unique_virus_index])
                #todo std
                
display(underground_mean)


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,Intensity,Intensity
Virus,Unnamed: 1_level_2,Unnamed: 2_level_2
CAL1,15689.0,238.727879
NO,14440.0,39.59798
PAN1,17560.666667,316.313663
X31A,21774.666667,968.669362
X31C,17090.0,


In [165]:
# estimate error of viruses which were only measured once but the average error

estimaded_std=Spot_no_underground["std"].dropna().mean()
nan_pos = pd.isnull(Spot_no_underground["std"])
for i in Spot_no_underground.index:
    if nan_pos[i]:
        Spot_no_underground.set_value(i,"std",estimaded_std)
#change format
output_std = Spot_no_underground.pivot_table("std","Row","Column")
#make new file for output std
output_std.to_csv("data/{}_std.csv".format(data_id_div),sep =",")
display(output_std)

Column,1,2,3,4,5,6,7,8,9,10,11,12
Row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,422.757152,968.669362,968.669362,968.669362,316.313663,316.313663,316.313663,238.727879,238.727879,238.727879,39.59798,39.59798
2,422.757152,968.669362,968.669362,968.669362,316.313663,316.313663,316.313663,238.727879,238.727879,238.727879,39.59798,39.59798
3,422.757152,968.669362,968.669362,968.669362,316.313663,316.313663,316.313663,238.727879,238.727879,238.727879,39.59798,39.59798
4,422.757152,968.669362,968.669362,968.669362,316.313663,316.313663,316.313663,238.727879,238.727879,238.727879,39.59798,39.59798
5,422.757152,968.669362,968.669362,968.669362,316.313663,316.313663,316.313663,238.727879,238.727879,238.727879,39.59798,39.59798
6,422.757152,968.669362,968.669362,968.669362,316.313663,316.313663,316.313663,238.727879,238.727879,238.727879,39.59798,39.59798
7,422.757152,968.669362,968.669362,968.669362,316.313663,316.313663,316.313663,238.727879,238.727879,238.727879,39.59798,39.59798
8,422.757152,968.669362,968.669362,968.669362,316.313663,316.313663,316.313663,238.727879,238.727879,238.727879,39.59798,39.59798


## change format and write preprocessed spotintensity file

In [166]:
output = Spot_no_underground.pivot_table("Intensity","Row","Column")
display(output)
output.to_csv("data/{}.csv".format(data_id_div),sep =",")

Column,1,2,3,4,5,6,7,8,9,10,11,12
Row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,5418,-1437,-1380,-2804,-60,-58,293,351,-177,-353,471,633
2,4139,1147,-388,-2286,-602,-806,-701,-62,305,318,100,281
3,4176,1674,1296,-841,987,-280,625,423,888,212,-54,5
4,4148,1773,4685,1485,596,256,988,653,708,892,341,150
5,3394,2423,6114,3283,743,484,2922,1434,666,233,61,-243
6,-2248,-4310,-4346,-4118,-2162,-1468,-1691,-276,546,-38,-81,-25
7,16216,15928,18871,17256,9941,7907,7699,8842,8086,8296,295,-266
8,0,-1104,400,704,344,-277,-66,-275,154,121,28,-28


In [168]:
# perpocess the spotintensities as wanted (e.g underground suptraction ...)
Spot_no_underground = Spot.copy()
Spot_no_underground["Intensity"] = Spot_no_underground["Intensity"].astype(np.float64)
for unique_virus_index in range(len(underground_mean)):
    for index in Spot.index.values:
        if Spot["Virus"][index] == underground_mean.index[unique_virus_index]:
output = Spot_no_underground.pivot_table("Intensity","Row","Column")

# write preprocessed data to file
output.to_csv("data/{}.csv".format(data_id_div), sep=',')

KeyError: 'Intensity'

In [None]:
display(output)

In [None]:
#find new id-name for measurment
data_id_div = '2017-05-12_MTP_R1_div_wet'
# write meta,pep_gal and 
meta.to_csv("data/{}.meta".format(data_id_div), sep='\t')
gal_pep.to_csv("data/{}_pep.gal".format(data_id_div), sep='\t')
gal_vir.to_csv("data/{}_vir.gal".format(data_id_div), sep='\t')


Spot_no_underground = Spot.copy()
Spot_no_underground["Intensity"] = Spot_no_underground["Intensity"].astype(np.float64)

for unique_virus_index in range(len(underground_mean)):
    for index in Spot.index.values:
        if Spot["Virus"][index] == underground_mean.index[unique_virus_index]:
            Spot_no_underground.set_value(index,"Intensity",value=Spot["Intensity"][index]/underground_mean["Intensity"][unique_virus_index])
output = Spot_no_underground.pivot_table("Intensity","Row","Column")

# write preprocessed data to file
#output.to_csv("data/{}.csv".format(data_id_div), sep=',')