In [1]:
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import PLSRegression
from sklearn.pipeline import Pipeline
from msresist.parameter_tuning import MSclusPLSR_tuning
from msresist.plsr import Q2Y_across_components, R2Y_across_components
from msresist.figures.figure2 import plotR2YQ2Y, plotMixedClusteringPLSR_GridSearch, plotMeasuredVsPredicted, plotScoresLoadings, plotScoresLoadings_plotly, plotclusteraverages
from msresist.clustering import MassSpecClustering
from msresist.pre_processing import preprocessing, MergeDfbyMean
from msresist.sequence_analysis import preprocess_seqs
import matplotlib.pyplot as plt
from msresist.FileExporter import create_download_link
import warnings
warnings.simplefilter("ignore")

In [2]:
# pd.set_option('display.max_colwidth', 1000)
# pd.set_option('display.max_rows', 1000)
# pd.set_option('display.max_columns', 1000)

In [3]:
ABC = preprocessing(AXLwt=True, motifs=True, Vfilter=True, FCfilter=True, log2T=True, mc_row=True)
ABC = preprocess_seqs(ABC, "Y").sort_values(by="Protein")

header = ABC.columns
treatments = ABC.columns[7:]

data = ABC.iloc[:, 7:].T
info = ABC.iloc[:, :7]

In [4]:
Y_cv1 = pd.read_csv('./msresist/data/Phenotypic_data/GrowthFactors/CV_raw3.csv').iloc[:30, :11]
Y_cv2 = pd.read_csv('./msresist/data/Phenotypic_data/GrowthFactors/CV_raw4.csv').iloc[:29, :11]

for ii in range(1, Y_cv2.columns.size):
    Y_cv1.iloc[:, ii] /= Y_cv1.iloc[0, ii]
    Y_cv2.iloc[:, ii] /= Y_cv2.iloc[0, ii]

Y_cv = MergeDfbyMean(pd.concat([Y_cv1, Y_cv2], axis=0), Y_cv1.columns, "Elapsed")
Y_cv = Y_cv.reset_index()[Y_cv1.columns]
Y_cv = Y_cv[Y_cv["Elapsed"] == 72].iloc[0, 1:]

# Figure out weights and peptides that move when changing weights

In [5]:
def plotScoresLoadings(data, info, Y_cv, ncl, ncomp, distance_method, GMMweight):
    mixedCl_plsr = Pipeline([('mixedCl', MassSpecClustering(info, ncl, GMMweight=GMMweight, distance_method=distance_method)), ('plsr', PLSRegression(ncomp))])
    fit = mixedCl_plsr.fit(data, Y_cv)
    labels = mixedCl_plsr.named_steps.mixedCl.labels_

    datacopy = data.copy()
    datacopy.columns = [info["Abbv"], info["Position"]]
    plotScoresLoadings_plotly(datacopy, labels, Y_cv, ncomp)

## BINOMIAL

In [6]:
# distance_method = "Binomial"

### DATA WEIGHT = 0.0

In [7]:
# GMMweight = 0.0
# ncl = 2
# ncomp = 2

# plotScoresLoadings(data, info, Y_cv, ncl, ncomp, distance_method, GMMweight)

### DATA WEIGHT = 0.5

In [8]:
# GMMweight = 0.5

# plotScoresLoadings(data, info, Y_cv, ncl, ncomp, distance_method, GMMweight)

### DATA WEIGHT = 2.0

In [9]:
# GMMweight = 2.0

# plotScoresLoadings(data, info, Y_cv, ncl, ncomp, distance_method, GMMweight)

## PAM250

In [10]:
distance_method = "PAM250"

### DATA WEIGHT = 0.0

In [11]:
GMMweight = 0.0
ncl = 4
ncomp = 2

plotScoresLoadings(data, info, Y_cv, ncl, ncomp, distance_method, GMMweight)

EOFError: marshal data too short

### DATA WEIGHT = 0.1

In [None]:
GMMweight = 0.5

plotScoresLoadings(data, info, Y_cv, ncl, ncomp, distance_method, GMMweight)

### DATA WEIGHT = 2.0

In [None]:
GMMweight = 2.0

plotScoresLoadings(data, info, Y_cv, ncl, ncomp, distance_method, GMMweight)