In [1]:
import math # Usualy everything you want to do with math, you can do with numpy
import re

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import scipy.stats as stats
import seaborn as sns

In [2]:
# Makes figures with white background
sns.set_style("whitegrid")
%matplotlib inline

In [3]:
ttests=[]
def ttest(df, df_sub1, df_sub2):
    """
    This function takes two subsetted dataframes where
    protein ID's are along the index and the samples
    you are testing are in the columns, and performs a
    ttest to determine whether each protein in the 
    subsets are at different levels. This function
    returns the original dataframe with a new column 
    for the p-value associated with the t-test. 
    equal_var can be set to True or false depending 
    on whether youneed to perform welches correction 
    on the data.
    
    parameters
    ----------
    df: pd.DataFrame
    df_sub1: pd.DataFrame
    df_sub2: pd.DataFrame
    
    returns
    -------
    dataframe with p-values
    
    """
    from scipy.stats import ttest_ind
    df_sub1 = df_sub1.transpose() # Transposition is  required
    df_sub2 = df_sub2.transpose()
    ttests = ttest_ind(df_sub1,df_sub2,equal_var=False,
                       nan_policy='omit')
    ttests = ttests.pvalue.transpose() # Transpose the pvalues back to the original orientation.
    df['ttest_pvalue']=ttests
    return df

In [4]:
def fold(df, sub1, sub2, sub1name, sub2name):
    """
    Creates a new column in your dataframe (df) for fold change between
    two sample types (in separate dataframes sub1 and sub2).
    
    Parameters
    ----------
    df : pd.DataFrame
    sub1 : pd.DataFrame
    sub2 : pd.DataFrame
    sub1name : string
    sub2name : string
    
    Returns
    -------
    Newly formed column in your dataframe.
    """
    df['%s Mean' % (sub1name)]=sub1.mean(axis=1,skipna=True)
    df['%s Mean' % (sub2name)]=sub2.mean(axis=1,skipna=True)
    df['Fold Change(%s/%s)' % (sub1name, sub2name)]=df['%s Mean' % (sub1name)]/df['%s Mean' % (sub2name)]
    return df['Fold Change(%s/%s)' % (sub1name, sub2name)]

In [5]:
def piscore(df,sub1name,sub2name):
    """
    pi score is a function which combines outputs from ttest and fold change functions
    and multiplies them to help pick biologically relevant hits within your systems.
    
    Parameters
    ----------
    df : pd.DataFrame
    sub1name : string
    sub2name: string
    
    Returns
    -------
    Newly formed column in your dataframe
    
    
    """
    df['Log2(Fold Change)'] = np.log2(df['Fold Change(%s/%s)'% (sub1name,sub2name)])
    df['-Log(P-value)'] = -np.log10(df['ttest_pvalue'])
    df['%s/%s pi score' % (sub1name,sub2name)] = df['-Log(P-value)']*df['Log2(Fold Change)']
    return df['%s/%s pi score' % (sub1name,sub2name)]

In [6]:
def TwoCategoryMaster(dfmain,dfsub1,dfsub2,sub1name,sub2name):
    """
    This function combines previously created user-defined functions to streamline the data
    analysis.
    
    Parameters
    ----------
    dfmain : pd.DataFrame
    dfsub1 : pd.DataFrame
    dfsub2 : pd.DataFrame
    sub1name : string
    sub2name : string
    
    Returns
    -------
    Updated dataframe
    """
    ttest(dfmain,dfsub1,dfsub2)
    fold(dfmain,dfsub1,dfsub2,sub1name,sub2name)
    piscore(dfmain,sub1name,sub2name)
    return dfmain

# Load in data

### Colon

In [7]:
# Reads in the dataframes as strings to preserve everything
rawdf = pd.read_csv("./CSVs/NormalizedCommonRepsAnn.txt", sep='\t', dtype=str)
rawdf2 = pd.read_csv("./CSVs/NormalizedDataAllAnn.txt", sep='\t', dtype=str)

# Sets the index to the protein ID
rawdf.set_index('datarest$ProteinID', inplace=True)
rawdf2.set_index('datarest$ProteinID', inplace=True)

# Transposes the distance matrix (.T) and casts the data to float
rawdf = rawdf.astype(float)
rawdf2 = rawdf2.astype(float)

In [8]:
protein_meta = pd.read_csv('./All_germfree_annotations.txt', sep = '\t')

In [9]:
samples = list(rawdf2.columns)
samples

['Col_C1',
 'Sp_GF3',
 'SI_C1',
 'Col_GF1',
 'He_C1',
 'He_GF2',
 'SI_GF3',
 'SI_GF1',
 'Sp_C1',
 'Col_C3',
 'SI_C3',
 'He_C2',
 'Col_GF2',
 'Sp_GF2',
 'He_GF1',
 'He_GF3',
 'Sp_C2',
 'SI_C2',
 'Sp_GF1',
 'Col_GF3',
 'Sp_C3',
 'He_C3',
 'Col_C2',
 'SI_GF2']

In [10]:
col_c=rawdf[["Col_C1","Col_C2","Col_C3"]]
col_gf=rawdf[["Col_GF1","Col_GF2","Col_GF3"]]

In [11]:
TwoCategoryMaster(rawdf,col_c,col_gf,'Col_Control','Col_Germ_Free')

Unnamed: 0_level_0,Col_C1,Sp_GF3,SI_C1,Col_GF1,He_C1,He_GF2,SI_GF3,SI_GF1,Sp_C1,Col_C3,...,He_C3,Col_C2,SI_GF2,ttest_pvalue,Col_Control Mean,Col_Germ_Free Mean,Fold Change(Col_Control/Col_Germ_Free),Log2(Fold Change),-Log(P-value),Col_Control/Col_Germ_Free pi score
datarest$ProteinID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0A023T778,198.276760,139.035859,335.531816,337.888468,152.764699,147.191801,319.928168,305.510309,471.562823,193.743796,...,392.937554,141.936476,193.097833,0.188082,177.985678,259.316577,0.686364,-0.542953,0.725654,-0.393996
A0A075B5K2,450.048235,334.905561,604.822571,75.915719,502.540411,165.246835,124.652688,107.890659,121.557659,231.504262,...,186.772662,113.794419,100.151634,0.184081,265.115639,69.226214,3.829700,1.937231,0.734991,1.423848
A0A075B5M7,50.791197,408.806735,65.677513,110.679863,130.176044,503.579855,113.526479,75.293226,71.299076,54.839947,...,331.781315,82.971553,152.518343,0.011772,62.867566,135.661683,0.463414,-1.109625,1.929160,-2.140645
A0A075B5P2,121.745933,269.493159,239.071425,67.304935,264.814969,152.899765,97.728059,111.566529,257.624151,151.265992,...,290.863721,119.897412,129.986357,0.008504,130.969779,66.116702,1.980888,0.986147,2.070377,2.041696
A0A075B5P3,115.330876,208.013361,221.467970,51.183746,554.658526,134.101561,42.360311,52.079667,297.369358,160.472045,...,213.432619,122.076705,233.643950,0.294367,132.626542,95.708511,1.385734,0.470650,0.531111,0.249968
A0A075B5P6,26.367278,536.215137,45.392824,39.259276,242.107018,285.085932,41.676457,35.799752,327.250052,38.613148,...,224.422760,43.005949,75.986479,0.264588,35.995458,50.047868,0.719221,-0.475494,0.577430,-0.274564
A0A075B5R2,536.687291,355.902137,897.264670,97.254320,1114.733360,759.155591,199.978174,329.089040,95.196687,794.502427,...,405.783926,124.230078,70.386865,0.170662,485.139932,75.993845,6.383937,2.674446,0.767863,2.053608
A0A075B5T2,159.271757,446.343277,148.855324,68.637911,541.217589,488.104158,123.201822,79.931423,357.696208,913.337200,...,124.283160,135.605812,179.463156,0.337692,402.738256,83.486112,4.824015,2.270234,0.471480,1.070369
A0A075B5T3,710.347781,470.062008,242.590062,136.035325,1114.890165,535.151797,162.872681,198.092585,118.736393,546.063439,...,248.443147,277.618709,126.005706,0.078557,511.343310,98.410236,5.196038,2.377412,1.104817,2.626604
A0A075B5U7,177.923760,206.734778,296.769931,43.359804,455.432469,97.619571,136.142570,47.435484,318.978351,113.087123,...,421.220279,228.597735,123.749351,0.089446,173.202873,80.095857,2.162445,1.112663,1.048439,1.166560


In [17]:
rawdf.to_csv('Col_ttest.csv')

### Small Intestine

In [13]:
# Reads in the dataframes as strings to preserve everything
rawdf = pd.read_csv("./CSVs/NormalizedCommonRepsAnn.txt", sep='\t', dtype=str)
rawdf2 = pd.read_csv("./CSVs/NormalizedDataAllAnn.txt", sep='\t', dtype=str)

# Sets the index to the protein ID
rawdf.set_index('datarest$ProteinID', inplace=True)
rawdf2.set_index('datarest$ProteinID', inplace=True)

# Transposes the distance matrix (.T) and casts the data to float
rawdf = rawdf.astype(float)
rawdf2 = rawdf2.astype(float)

In [78]:
SI_c=rawdf[["SI_C1","SI_C2","SI_C3"]]
SI_gf=rawdf[["SI_GF1","SI_GF2","SI_GF3"]]

In [79]:
TwoCategoryMaster(rawdf,SI_c,SI_gf,'SI_Control','SI_Germ_Free')

Unnamed: 0_level_0,Col_C1,Sp_GF3,SI_C1,Col_GF1,He_C1,He_GF2,SI_GF3,SI_GF1,Sp_C1,Col_C3,...,He_C3,Col_C2,SI_GF2,ttest_pvalue,SI_Control Mean,SI_Germ_Free Mean,Fold Change(SI_Control/SI_Germ_Free),Log2(Fold Change),-Log(P-value),SI_Control/SI_Germ_Free pi score
datarest$ProteinID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0A023T778,198.276760,139.035859,335.531816,337.888468,152.764699,147.191801,319.928168,305.510309,471.562823,193.743796,...,392.937554,141.936476,193.097833,0.789946,256.848477,272.845437,0.941370,-0.087166,0.102403,-0.008926
A0A075B5K2,450.048235,334.905561,604.822571,75.915719,502.540411,165.246835,124.652688,107.890659,121.557659,231.504262,...,186.772662,113.794419,100.151634,0.015208,541.559738,110.898327,4.883390,2.287883,1.817914,4.159175
A0A075B5M7,50.791197,408.806735,65.677513,110.679863,130.176044,503.579855,113.526479,75.293226,71.299076,54.839947,...,331.781315,82.971553,152.518343,0.940826,110.368312,113.779349,0.970021,-0.043913,0.026491,-0.001163
A0A075B5P2,121.745933,269.493159,239.071425,67.304935,264.814969,152.899765,97.728059,111.566529,257.624151,151.265992,...,290.863721,119.897412,129.986357,0.106002,194.393287,113.093648,1.718870,0.781460,0.974688,0.761680
A0A075B5P3,115.330876,208.013361,221.467970,51.183746,554.658526,134.101561,42.360311,52.079667,297.369358,160.472045,...,213.432619,122.076705,233.643950,0.542676,157.396411,109.361309,1.439233,0.525300,0.265460,0.139446
A0A075B5P6,26.367278,536.215137,45.392824,39.259276,242.107018,285.085932,41.676457,35.799752,327.250052,38.613148,...,224.422760,43.005949,75.986479,0.621763,43.641713,51.154229,0.853140,-0.229146,0.206375,-0.047290
A0A075B5R2,536.687291,355.902137,897.264670,97.254320,1114.733360,759.155591,199.978174,329.089040,95.196687,794.502427,...,405.783926,124.230078,70.386865,0.626197,356.439353,199.818026,1.783820,0.834970,0.203289,0.169740
A0A075B5T2,159.271757,446.343277,148.855324,68.637911,541.217589,488.104158,123.201822,79.931423,357.696208,913.337200,...,124.283160,135.605812,179.463156,0.712079,146.036582,127.532134,1.145096,0.195469,0.147472,0.028826
A0A075B5T3,710.347781,470.062008,242.590062,136.035325,1114.890165,535.151797,162.872681,198.092585,118.736393,546.063439,...,248.443147,277.618709,126.005706,0.712887,139.050900,162.323658,0.856627,-0.223260,0.146979,-0.032815
A0A075B5U7,177.923760,206.734778,296.769931,43.359804,455.432469,97.619571,136.142570,47.435484,318.978351,113.087123,...,421.220279,228.597735,123.749351,0.030146,300.750925,102.442468,2.935803,1.553755,1.520777,2.362915


In [23]:
rawdf.to_csv('SI_ttest.csv')

### Heart

In [15]:
# Reads in the dataframes as strings to preserve everything
rawdf = pd.read_csv("./CSVs/NormalizedCommonRepsAnn.txt", sep='\t', dtype=str)
rawdf2 = pd.read_csv("./CSVs/NormalizedDataAllAnn.txt", sep='\t', dtype=str)

# Sets the index to the protein ID
rawdf.set_index('datarest$ProteinID', inplace=True)
rawdf2.set_index('datarest$ProteinID', inplace=True)

# Transposes the distance matrix (.T) and casts the data to float
rawdf = rawdf.astype(float)
rawdf2 = rawdf2.astype(float)

In [16]:
He_c=rawdf[["He_C1","He_C2","He_C3"]]
He_gf=rawdf[["He_GF1","He_GF2","He_GF3"]]

In [17]:
TwoCategoryMaster(rawdf,He_c,He_gf,'He_Control','He_Germ_Free')

Unnamed: 0_level_0,Col_C1,Sp_GF3,SI_C1,Col_GF1,He_C1,He_GF2,SI_GF3,SI_GF1,Sp_C1,Col_C3,...,He_C3,Col_C2,SI_GF2,ttest_pvalue,He_Control Mean,He_Germ_Free Mean,Fold Change(He_Control/He_Germ_Free),Log2(Fold Change),-Log(P-value),He_Control/He_Germ_Free pi score
datarest$ProteinID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0A023T778,198.276760,139.035859,335.531816,337.888468,152.764699,147.191801,319.928168,305.510309,471.562823,193.743796,...,392.937554,141.936476,193.097833,0.718841,254.305772,294.134100,0.864591,-0.209910,0.143367,-0.030094
A0A075B5K2,450.048235,334.905561,604.822571,75.915719,502.540411,165.246835,124.652688,107.890659,121.557659,231.504262,...,186.772662,113.794419,100.151634,0.296709,339.441345,212.200215,1.599628,0.677736,0.527669,0.357621
A0A075B5M7,50.791197,408.806735,65.677513,110.679863,130.176044,503.579855,113.526479,75.293226,71.299076,54.839947,...,331.781315,82.971553,152.518343,0.077055,197.721066,974.623517,0.202869,-2.301378,1.113199,-2.561893
A0A075B5P2,121.745933,269.493159,239.071425,67.304935,264.814969,152.899765,97.728059,111.566529,257.624151,151.265992,...,290.863721,119.897412,129.986357,0.442261,207.255884,139.553200,1.485139,0.570598,0.354322,0.202175
A0A075B5P3,115.330876,208.013361,221.467970,51.183746,554.658526,134.101561,42.360311,52.079667,297.369358,160.472045,...,213.432619,122.076705,233.643950,0.851313,367.186633,418.162548,0.878095,-0.187550,0.069911,-0.013112
A0A075B5P6,26.367278,536.215137,45.392824,39.259276,242.107018,285.085932,41.676457,35.799752,327.250052,38.613148,...,224.422760,43.005949,75.986479,0.146650,238.754621,419.774186,0.568769,-0.814085,0.833718,-0.678717
A0A075B5R2,536.687291,355.902137,897.264670,97.254320,1114.733360,759.155591,199.978174,329.089040,95.196687,794.502427,...,405.783926,124.230078,70.386865,0.540923,552.675877,304.577481,1.814566,0.859624,0.266864,0.229403
A0A075B5T2,159.271757,446.343277,148.855324,68.637911,541.217589,488.104158,123.201822,79.931423,357.696208,913.337200,...,124.283160,135.605812,179.463156,0.785585,256.542206,197.316994,1.300153,0.378681,0.104807,0.039688
A0A075B5T3,710.347781,470.062008,242.590062,136.035325,1114.890165,535.151797,162.872681,198.092585,118.736393,546.063439,...,248.443147,277.618709,126.005706,0.929835,562.769841,596.660578,0.943199,-0.084365,0.031594,-0.002665
A0A075B5U7,177.923760,206.734778,296.769931,43.359804,455.432469,97.619571,136.142570,47.435484,318.978351,113.087123,...,421.220279,228.597735,123.749351,0.086451,366.219025,156.383382,2.341803,1.227620,1.063228,1.305239


In [38]:
rawdf.to_csv('He_ttest.csv')

### Spleen

In [18]:
# Reads in the dataframes as strings to preserve everything
rawdf = pd.read_csv("./CSVs/NormalizedCommonRepsAnn.txt", sep='\t', dtype=str)
rawdf2 = pd.read_csv("./CSVs/NormalizedDataAllAnn.txt", sep='\t', dtype=str)

# Sets the index to the protein ID
rawdf.set_index('datarest$ProteinID', inplace=True)
rawdf2.set_index('datarest$ProteinID', inplace=True)

# Transposes the distance matrix (.T) and casts the data to float
rawdf = rawdf.astype(float)
rawdf2 = rawdf2.astype(float)

In [19]:
Sp_c=rawdf[["Sp_C1","Sp_C2","Sp_C3"]]
Sp_gf=rawdf[["Sp_GF1","Sp_GF2","Sp_GF3"]]

In [20]:
TwoCategoryMaster(rawdf,Sp_c,Sp_gf,'Sp_Control','Sp_Germ_Free')

Unnamed: 0_level_0,Col_C1,Sp_GF3,SI_C1,Col_GF1,He_C1,He_GF2,SI_GF3,SI_GF1,Sp_C1,Col_C3,...,He_C3,Col_C2,SI_GF2,ttest_pvalue,Sp_Control Mean,Sp_Germ_Free Mean,Fold Change(Sp_Control/Sp_Germ_Free),Log2(Fold Change),-Log(P-value),Sp_Control/Sp_Germ_Free pi score
datarest$ProteinID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0A023T778,198.276760,139.035859,335.531816,337.888468,152.764699,147.191801,319.928168,305.510309,471.562823,193.743796,...,392.937554,141.936476,193.097833,0.739504,263.140087,217.827564,1.208020,0.272645,0.131059,0.035733
A0A075B5K2,450.048235,334.905561,604.822571,75.915719,502.540411,165.246835,124.652688,107.890659,121.557659,231.504262,...,186.772662,113.794419,100.151634,0.878840,222.115284,208.855500,1.063488,0.088804,0.056090,0.004981
A0A075B5M7,50.791197,408.806735,65.677513,110.679863,130.176044,503.579855,113.526479,75.293226,71.299076,54.839947,...,331.781315,82.971553,152.518343,0.035696,115.848198,531.700534,0.217882,-2.198378,1.447382,-3.181893
A0A075B5P2,121.745933,269.493159,239.071425,67.304935,264.814969,152.899765,97.728059,111.566529,257.624151,151.265992,...,290.863721,119.897412,129.986357,0.311383,256.114780,332.295614,0.770744,-0.375677,0.506705,-0.190357
A0A075B5P3,115.330876,208.013361,221.467970,51.183746,554.658526,134.101561,42.360311,52.079667,297.369358,160.472045,...,213.432619,122.076705,233.643950,0.846458,299.875925,274.417515,1.092773,0.127993,0.072394,0.009266
A0A075B5P6,26.367278,536.215137,45.392824,39.259276,242.107018,285.085932,41.676457,35.799752,327.250052,38.613148,...,224.422760,43.005949,75.986479,0.029403,329.053086,524.035308,0.627922,-0.671344,1.531608,-1.028236
A0A075B5R2,536.687291,355.902137,897.264670,97.254320,1114.733360,759.155591,199.978174,329.089040,95.196687,794.502427,...,405.783926,124.230078,70.386865,0.851244,156.536068,176.918544,0.884792,-0.176590,0.069946,-0.012352
A0A075B5T2,159.271757,446.343277,148.855324,68.637911,541.217589,488.104158,123.201822,79.931423,357.696208,913.337200,...,124.283160,135.605812,179.463156,0.957622,311.640354,307.200210,1.014454,0.020703,0.018806,0.000389
A0A075B5T3,710.347781,470.062008,242.590062,136.035325,1114.890165,535.151797,162.872681,198.092585,118.736393,546.063439,...,248.443147,277.618709,126.005706,0.591749,216.734347,281.618117,0.769604,-0.377812,0.227862,-0.086089
A0A075B5U7,177.923760,206.734778,296.769931,43.359804,455.432469,97.619571,136.142570,47.435484,318.978351,113.087123,...,421.220279,228.597735,123.749351,0.447992,295.444816,265.544123,1.112602,0.153937,0.348730,0.053682


In [48]:
rawdf.to_csv('Sp_ttest.csv')

### Combine data tables

In [30]:
col = pd.read_csv('./Col_ttest.csv')
sp = pd.read_csv('./Sp_ttest.csv')
he = pd.read_csv('./He_ttest.csv')
il = pd.read_csv('./SI_ttest.csv')

In [31]:
sp = sp.drop(samples, axis=1)
he = he.drop(samples, axis=1)
il = il.drop(samples, axis=1)
il

Unnamed: 0.1,Unnamed: 0,datarest$ProteinID,ttest_pvalue,SI_Control Mean,SI_Germ_Free Mean,Fold Change(SI_Control/SI_Germ_Free),Log2(Fold Change),-Log(P-value),SI_Control/SI_Germ_Free pi score,BH_Critical_Val_Q_0.25
0,0,Q91VM9,0.000514,110.618359,144.527698,0.765378,-0.385755,3.288909,-1.268713e+00,0.000054
1,1,P17897,0.000529,691.969995,432.977057,1.598168,0.676419,3.276594,2.216350e+00,0.000107
2,2,A0A075B6A3,0.000616,552.694258,204.434578,2.703526,1.434842,3.210673,4.606810e+00,0.000161
3,3,Q91W90,0.000823,259.896677,162.013605,1.604166,0.681823,3.084659,2.103192e+00,0.000215
4,4,P00920,0.001335,31.510885,21.493717,1.466051,0.551935,2.874517,1.586547e+00,0.000268
5,5,Q00623,0.002245,116.640210,230.236739,0.506610,-0.981053,2.648706,-2.598520e+00,0.000322
6,6,Q3UPV6,0.002477,136.190930,57.662982,2.361843,1.239913,2.606118,3.231361e+00,0.000375
7,7,Q61024,0.002666,375.554027,191.533255,1.960777,0.971426,2.574198,2.500642e+00,0.000429
8,8,Q3TR54,0.002709,110.504499,30.985240,3.566359,1.834452,2.567224,4.709449e+00,0.000483
9,9,Q8CB12,0.003160,115.415064,35.443102,3.256348,1.703255,2.500327,4.258694e+00,0.000536


In [33]:
col = col.merge(sp, left_on='datarest$ProteinID', right_on = 'datarest$ProteinID')
col = col.merge(il, left_on='datarest$ProteinID', right_on = 'datarest$ProteinID')
col = col.merge(he, left_on='datarest$ProteinID', right_on = 'datarest$ProteinID')

In [37]:
col.merge(protein_meta, left_on = 'datarest$ProteinID', right_on = 'Entry').to_csv('./All_piscores_annotated.csv')

In [41]:
rawdf2.merge(protein_meta, left_on = 'datarest$ProteinID', right_on = 'Entry').to_csv('./DataAll_annotated.csv')

### Brain

In [23]:
df = pd.read_csv("../GF_Brains_ProteinsNormalized_2019.txt", sep='\t')
protein_annotations = pd.read_csv('./GF_brains_uniprotann.txt', sep = '\t')

In [24]:
c=df[["C1","C2","C3"]]
gf=df[["GF1","GF2","GF3"]]

In [25]:
TwoCategoryMaster(df,c,gf,'Control','Germ Free')

Unnamed: 0,Protein,Description,C1,C2,C3,GF1,GF2,GF3,ttest_pvalue,Control Mean,Germ Free Mean,Fold Change(Control/Germ Free),Log2(Fold Change),-Log(P-value),Control/Germ Free pi score
0,A0A023T778,Mago nashi protein OS=Mus musculus OX=10090 GN...,483.704235,490.270176,503.325983,505.316508,495.230915,467.929830,0.830063,492.433465,489.492418,1.006008,0.008642,0.080889,0.000699
1,A0A067XG46,X-linked retinitis pigmentosa GTPase regulator...,440.123023,382.281357,419.262600,559.974968,500.726167,601.555780,0.022389,413.888993,554.085638,0.746977,-0.420865,1.649963,-0.694412
2,A0A075B6A0,Immunoglobulin heavy constant mu (Fragment) OS...,331.603817,318.282324,407.130203,540.759513,844.489645,425.259739,0.177381,352.338782,603.502966,0.583823,-0.776398,0.751092,-0.583146
3,A0A087WNT1,Elongin-C OS=Mus musculus OX=10090 GN=Eloc PE=...,462.780246,551.861595,544.243127,442.184512,542.785926,410.444274,0.334722,519.628323,465.138237,1.117148,0.159821,0.475315,0.075965
4,A0A087WNU5,Ankyrin-3 (Fragment) OS=Mus musculus OX=10090 ...,486.062796,484.168997,473.861636,495.806241,498.516212,502.914447,0.024693,481.364476,499.078966,0.964506,-0.052138,1.607427,-0.083809
5,A0A087WNU6,Leucine-rich repeat flightless-interacting pro...,489.933778,441.713645,456.037928,511.411848,519.786173,516.770231,0.060894,462.561783,515.989418,0.896456,-0.157695,1.215427,-0.191667
6,A0A087WNZ5,Lymphocyte antigen 6C1 OS=Mus musculus OX=1009...,600.128465,531.845679,474.739564,416.032609,466.470682,498.579637,0.169520,535.571236,460.360976,1.163372,0.218313,0.770780,0.168271
7,A0A087WP33,Autophagy-related protein 9 (Fragment) OS=Mus ...,434.193254,600.089479,445.653352,492.312676,371.015504,582.976601,0.897276,493.312028,482.101594,1.023253,0.033163,0.047074,0.001561
8,A0A087WP80,Limbic system-associated membrane protein OS=M...,468.898396,532.325701,424.255679,515.105061,456.636911,533.004491,0.537557,475.159925,501.582154,0.947322,-0.078073,0.269576,-0.021047
9,A0A087WPD1,Regulator of G-protein-signaling 6 OS=Mus musc...,506.181061,526.904263,505.155894,452.563276,495.919707,472.268913,0.068520,512.747073,473.583965,1.082695,0.114627,1.164182,0.133447


In [26]:
df = df.merge(protein_annotations, left_on = "Protein", right_on = "Entry")

In [None]:
df.to_csv('./GF_Brains_piscores_Annotated.csv')