In [None]:
# run this once to install the required packages
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install statsmodels
!pip install scipy
!pip install --upgrade "git+https://github.com/science64/DynaTMT-py-SB.git" # Current version: 2.9.2 (2024-06-03)
!pip install --upgrade "git+https://github.com/science64/PBLMM.git" # Current version: 2.1.1 (2023-10-23)

In [None]:
# import the required packages

from datetime import date
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import warnings
from statsmodels.stats.multitest import multipletests, local_fdr
from scipy.stats import zscore, uniform
import matplotlib.pyplot as plt
import DynaTMT_SB.DynaTMT as mePROD
import PBLMM as statisticsGetter

warnings.filterwarnings("ignore")

In [None]:
wd = "Example data" # you can define your folder here etc: C://Users/Jasmin/Desktop/JS55/fractionation/

nameOfStudy = "24h_CCCP+ISRIB_SB_import" # please define a name for your study

dataName = "20200724_SB_CCCP+ISRIB_Import_PSMs.txt" # please define the name of your data file (PSMs) here

conditions = ['Light', 'DMSO', 'DMSO', 'DMSO', 'CCCP', 'CCCP', 'CCCP', 'CCCP_ISRIB', 'CCCP_ISRIB', 'CCCP_ISRIB'] # define the conditions of TMT multiplexing here 
pairs = [['CCCP', 'DMSO'], ['CCCP_ISRIB', 'DMSO'], ['CCCP_ISRIB', 'CCCP']] # define the pairs of conditions you want to compare here. result will be log2(CCCP/DMSO)

In [None]:
psms = pd.read_csv(f'{wd}/{dataName}', sep='\t', header=0) # TEXT or CSV file: you provide your .txt PSM or peptide file here.

boster_removed = psms.drop('Abundance: 131C', axis = True) # remove the booster channel if present

process = mePROD.PD_input(boster_removed) # initiate your date here with PD_input class, if your data name is 'boster_removed'

filter_data = process.filter_PSMs(boster_removed) # filter contamination, NA samples, shared peptides

IT_adjusted = process.IT_adjustment(filter_data) # IT adjusment helpful for MS2 samples

sumNorm = process.total_intensity_normalisation(filter_data) # for total intenstiy normalization

heavy = process.extract_heavy(sumNorm) # extract heavy PSMs/peptides

light = process.extract_light(sumNorm) # extract light PSMs/peptides (OPTIONAL)

peptide_data = process.baseline_correction(heavy, threshold=15, i_baseline=0, random=True) # baseline correction of heavy PSMs/peptides

# PBLMM analysis ==> this is the main part of the statistical analysis based on peptide based linear mixed model (LMM)
hypo = statisticsGetter.HypothesisTesting()
resultFinal = hypo.peptide_based_lmm(peptide_data,conditions=conditions,pairs=pairs)
resultFinal.reset_index(inplace=True)
resultFinal.rename(columns={'index': 'Accession'}, inplace=True)

resultFinal.to_excel(f'{nameOfStudy}_mePROD_PBLMM_{date.today().strftime("%d.%m.%Y")}.xlsx', index=False, engine='openpyxl')

print('[#] COMPLETED: resultFinal: %s rows x %s columns' % (resultFinal.shape[0], resultFinal.shape[1]))