In [1]:
from jupyter_helpers.selective_import import notebooks_importer 
from Data import clinical, raw_protein_matrix, protein_levels, by_condition

Importing Jupyter notebook from Data.ipynb


In [2]:
from helpers.utilities import *
%run helpers/notebook_setup.ipynb

In [3]:
%R source('plots/colors.R');

## Differential levels of proteins

**Important notes from meeting:**
- During the study there were 186 admissions of meningitis patients in the study hospital. It is likely that many of the severe patients were not enrolled in the study, as it was not the priority of the clinician to ask for consent (or it would be impossible).
  - this might impact survival estimates, i.e. we may be missing some of the very severe patients who died too early for the necessary consents and biofluids to be collected
- Missing protein data → to little CSF, they need to decide whether these will be used to CSF or RNAseq. This may be random-like decision.

### Using limma

In [4]:
%%R -i by_condition -i raw_protein_matrix
diff_ebayes = function(a, b) {
    considered = raw_protein_matrix[by_condition == a | by_condition == b]
    a_cnt = sum(by_condition == a)
    b_cnt = sum(by_condition == b)
    groups = c(rep(0, a_cnt), rep(1, b_cnt))
    design = cbind(Intercept=1, Group=groups)
    fit = limma::lmFit(considered, design)
    fit = limma::eBayes(fit)
    fit
}

In [5]:
%%R
result = diff_ebayes('Tuberculosis', 'Healthy control')
limma::topTable(result, coef=2)

                   logFC   AveExpr         t      P.Value    adj.P.Val
Lysozyme       -61798.20  60355.07 -14.55624 1.647120e-19 2.149491e-16
TIMP-1         -65320.26  82368.13 -13.55184 2.821693e-18 1.571454e-15
FCG3B          -72822.64  71615.26 -13.46652 3.612537e-18 1.571454e-15
C3d           -124850.49  94291.52 -13.33801 5.250174e-18 1.712869e-15
C2             -61972.45  51762.00 -12.16199 1.771368e-16 3.652304e-14
IGFBP-4       -104840.02 176451.04 -12.14024 1.893649e-16 3.652304e-14
sL-Selectin   -100896.09  88874.23 -12.12918 1.959090e-16 3.652304e-14
Cyclophilin A -130136.76 110239.93 -11.95277 3.374827e-16 5.505186e-14
TNF sR-II     -104918.56  71753.02 -11.74424 6.452347e-16 8.525402e-14
FSTL1          -70719.52 108453.99 -11.74027 6.532875e-16 8.525402e-14
                      B
Lysozyme      -4.432507
TIMP-1        -4.437235
FCG3B         -4.437672
C3d           -4.438341
C2            -4.445157
IGFBP-4       -4.445296
sL-Selectin   -4.445367
Cyclophilin A -4.446513
TNF

In [6]:
%%R -o top
result = diff_ebayes('Tuberculosis', 'Healthy control')
top = limma::topTable(result, number=50, coef=2)

In [7]:
top

Unnamed: 0,logFC,AveExpr,t,P.Value,adj.P.Val,B
Lysozyme,-61798.203538,60355.074510,-14.556238,1.647120e-19,2.149491e-16,-4.432507
TIMP-1,-65320.261538,82368.125490,-13.551842,2.821693e-18,1.571454e-15,-4.437235
FCG3B,-72822.638769,71615.258824,-13.466519,3.612537e-18,1.571454e-15,-4.437672
C3d,-124850.490462,94291.517647,-13.338010,5.250174e-18,1.712869e-15,-4.438341
C2,-61972.450000,51761.998039,-12.161993,1.771368e-16,3.652304e-14,-4.445157
...,...,...,...,...,...,...
BAFF,-929.621692,575.960784,-8.414903,4.151744e-11,1.177832e-09,-4.478259
a1-Antitrypsin,-41023.540923,27110.798039,-8.357609,5.076249e-11,1.409469e-09,-4.478941
a2-Macroglobulin,-19450.555231,12589.033333,-8.330416,5.584998e-11,1.518421e-09,-4.479267
Osteopontin,-29015.079846,33831.825490,-8.244342,7.559546e-11,2.013308e-09,-4.480308


In [8]:
relevant_aptamers = read_csv('data/other/relevant_aptamers.csv')

In [9]:
def show_uniprot_query(top, n=10):
    chosen = top.head(n).index
    metadata = relevant_aptamers[relevant_aptamers.Target.isin(chosen)]
    print('\n'.join(metadata.UniProt))

In [10]:
show_uniprot_query(top)

Q12841
P06681
P62937
O75015
P22692
P61626
P01033
P20333
P14151
P01024


In [11]:
%%R -o top
result = diff_ebayes('Cryptococcal', 'Healthy control')
top = limma::topTable(result, number=50, coef=2)

In [12]:
top

Unnamed: 0,logFC,AveExpr,t,P.Value,adj.P.Val,B
Lysozyme,-55767.433667,56164.769388,-11.352676,4.060492e-15,5.298943e-12,-3.002309
GRN,-65775.903500,41436.653061,-11.012807,1.163115e-14,7.589324e-12,-3.030406
TIMP-1,-56021.279167,76506.593878,-10.600543,4.252599e-14,1.849880e-11,-3.066698
TNF sR-II,-114593.359333,74392.483673,-9.927407,3.697703e-13,1.206376e-10,-3.131608
Cyclophilin A,-96929.971000,91371.608163,-9.430054,1.893529e-12,4.942111e-10,-3.184483
...,...,...,...,...,...,...
CBPE,903.992167,1132.016327,6.236349,1.144032e-07,3.245569e-06,-3.644369
SPARCL1,18999.956167,54352.951020,6.222382,1.201445e-07,3.329297e-06,-3.646877
kallikrein 14,-480.966500,302.071429,-6.216944,1.224569e-07,3.329297e-06,-3.647855
LCMT1,-128.265000,114.783673,-6.209690,1.256109e-07,3.345350e-06,-3.649160
