In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import matplotlib.pylab as pylab
import ipywidgets as widgets
from os import path
from sklearn.decomposition import PCA
from matplotlib.offsetbox import AnchoredText
from IPython.display import clear_output
from IPython.display import display
%matplotlib inline

In [6]:
class StatisticalPlotGenerator:
    def __init__(self):
        #self.__pathList_ = []
        #self.__dataDict = {}
        #self.__cutOption = " "
        

        # a dictionary, key is the sample name, value is a list which contains all the cnt matrices for the same
        # sample
        self.__sampleDicts = {}
        #self.__sampleNames = " "
        self.__tempDataFrame = None
        
        
    def __loadRSEMReal(self, sampleName):
        """
        as the name, this function aims loading data into the program. 
        files must be stored in csv format, and each csv file contains one pair or more than one pairs of the counts of
        the samples. that means, a csv format must contain at least one of the real data's the counts and the counts of
        a simulated data which has been generated based on the read data.
    
        input:
        @sampleName: the name of the sample
    
        """
        cutOff = [0, 2.5, 5, 7.5, 10]
        # key of the tempDict is the cut off threshould, value is the cnt matrix accordingly
        tempDict = {}
        dictList = []
        for cut in cutOff:            
            fullPath = "/home/lima/Projects/simulation/SingleSample/Samples/{sampleName}WithReadsShorterThan{cut}cutOff.csv".format(sampleName = sampleName, cut = cut)
            dataFrame = pd.read_csv(fullPath)
            dataFrame = dataFrame.rename(columns = {"Unnamed: 0": "GeneID"})
            #print(dataFrame.isnull().values.any())
            
            tempDict[cut] = dataFrame
            dictList.append(tempDict)
        self.__sampleDicts[sampleName] = dictList
        
   
    
    def prepRSEMReal(self):
        nameOptions = pd.read_csv("/home/lima/Projects/simulation/SingleSample/Option.csv", sep=" ", header = None)
        for index in range(len(nameOptions)):
            name = nameOptions.iloc[index, 0]
            print(name)
            self.__loadData(name)
        #print(len(self.__sampleDicts))
            
    def plot(self, samples):
        """
        if all the samples are plotted out, this notebook will not be able to uploade to github
        so, just for example, i will take two samples
        """
        samples = samples
        for sample in samples:
            cntMatrices = self.__sampleDicts[sample]
            sim_name = "{sampleName}_Sim".format(sampleName = sample)
            sampleNames = [sample, sim_name]
            index = 0
            for matrix in cntMatrices:
                keys = matrix.keys()
                values = matrix.values()
                keys = list(keys)
                values = list(values)
                
                # key is the cut off threshould
                key = keys[index]
                value = values[index]
                index = index + 1
                #print(sampleNames)
                #print("Cut off threshould is {key} .....".format(key = key))
                #print(value.head())
                #print("{number} genes left".format(number = len(value)))
                
                df = value[sampleNames]
                dfOne = np.log2(df + 1)
                dfTwo = np.log2(df + 1)
                pen = PlotPen()
                pen.plotPlotBlock(dfOne, key, sampleNames)
                pen.plotCorrScatterPlot(dfTwo, key, sampleNames)
                
class PlotPen:
    
    def plotPlotBlock(self, df, cutOff, names):
        
        params = {'legend.fontsize': 8, 
                    'font.family': 'serif',
                    'font.weight': 'medium',
                    'font.variant': 'normal',
                    'figure.figsize': (10, 10), 
                    'figure.edgecolor': '#04253a', 
                    'figure.titlesize': 8, 
                    'axes.labelsize': 6, 
                    'axes.titlesize': 6, 
                    'xtick.labelsize': 6, 
                    'ytick.labelsize': 6}
        pylab.rcParams.update(params)
        figureOne, axes = plt.subplots(2, 2, constrained_layout = True)
        self.buildPlotBlock(df, axes, cutOff, names)
        plt.suptitle('Statistical plots for comparing {realData} & {simulatedData} with reads shorter than {cut} being removed'.format(realData = names[0], simulatedData = names[1], cut = cutOff))
        title = 'Real data and synthetic data of {realData} with reads shorter than {cut} being removed.png'.format(realData = names[0], cut = cutOff)
        plt.savefig(title, dpi = 300, bbox_inches = 'tight')
        plt.plot()
        
    def buildPlotBlock(self, df, axes, cutOff, names):
        """
        buildPlotBlock is a helper function helping putting the desired plots, such as the scatter plot, box plot,
        violin plot and the distribution plot into a grided pyplot
    
        Input:
        @df: the dataframe which provides the data
        @axes: a 2x2 subplot matrix 
        """
        # df = np.log2(df[names] + 1)
        sns.set(style = "whitegrid", palette = "muted", color_codes = True)
        # put the violin plot to axes[0, 0]
        axes[0, 0].clear()
        sns.violinplot(data = df, linewidth = 0.2, palette = "husl", ax = axes[0, 0])
    
        # put the boxplot to axes[1, 1]
        axes[1, 1].clear()
        sns.boxplot(data = df, linewidth = 0.2, palette = "husl", ax = axes[1, 1])
    
        # put the distribution plot to axes[0, 1]
        axes[0, 1].clear()
        sns.distplot(df.iloc[ : , 0], kde = True, color = "b", label = 'Real', ax = axes[0, 1])
        sns.distplot(df.iloc[ : , 1], kde = True, color = "r", label = 'Simulated', ax = axes[0, 1])
        axes[0, 1].legend(loc = 'best', prop = {'size': 8})
        axes[0, 1].set_xlabel("Distribution plot for real data and simulated data")
    
        # put the scatter plot to axes [1, 0]
        axes[1, 0].clear()
        self.scatterPlot(df, axes[1, 0], cutOff, names)
                         
    def scatterPlot(self, df, ax, cutOff, names):
        """
        the scatterPlot function plots a pairwise scatter plot for the simulated data and the real data for the same 
        sample, with the x-coordinates being the counts number of the real data and the y-coordinates being the counts
        number of the simulated data
    
        input data:
        @df: a pandas dataframe contains the counts for the real and the simulated data
        @ax: an axes object
        @names: a list contains the sample names
        """
    
        # notInrealData, notInSimulatedData, inNeither are lists stores the row indecies for data as theirs names
        notInRealData = df.index[df.iloc[ : , 0] == 0].tolist()
        notInSimulatedData = df.index[df.iloc[ : , 1] == 0].tolist()
        inNeither = list(set(notInRealData) & set(notInSimulatedData))
    
        # inBoth, simOnly, realOnly, neither are integers for 
        # genes found in both samples, one of the samples or none of them
        neither = len(inNeither)
        simOnly = len(notInRealData) - neither
        realOnly = len(notInSimulatedData) - neither
        inBoth = len(df) - simOnly - realOnly + neither
    
        # category the genes into 4 classes
        df["ExpressionStatus"] = ["BothSamples"] * len(df)
        df.loc[notInRealData, "ExpressionStatus"] = "SimulatedOnly"
        df.loc[notInSimulatedData, "ExpressionStatus"] = "RealOnly"
        df.loc[inNeither, "ExpressionStatus"] = "NeithSample"
        # print(df)
        # set up the plotting
        sns.set(style = "whitegrid", palette = "muted", color_codes = True)
        # sns.set(style = "dark", palette = "inferno", color_codes = True)
        # calculate the correlation between the two sets of data.
        # print(df.iloc[ : , 0])
        col_one = df.iloc[ : , 0]
        col_two = df.iloc[ : , 1]
        corr = scipy.stats.pearsonr(col_one, col_two)[0]
        
        sns.scatterplot(x = names[0], y = names[1], hue = 'ExpressionStatus', marker = '+', style = 'ExpressionStatus', data = df, ax = ax, palette = 'inferno', s = 2.0)
        at = AnchoredText("{inBoth} are found in both real and simulated data\n{neither} expressed in neither real nor simulated data\n{realOnly} are found in real data only\n{simOnly} are found in simulated data only\nCut off threshould: {cut}\nCorr: {corr}".format(
            neither = neither, realOnly = realOnly, simOnly = simOnly, inBoth = inBoth, corr = corr, total = len(df) - realOnly - simOnly + len(inNeither), cut = cutOff), 
                          prop = dict(size = 8), frameon = True, loc = 'upper left')
        at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
   
        ax.add_artist(at)
        ax.set_xlabel("Real Data")
        ax.set_ylabel("Simulated Data")
        ax.legend(loc = 'lower right', fancybox = True, shadow = False, scatterpoints = 200, prop = {'size': 8})

    def plotCorrScatterPlot(self, df, cutOff, names):
        print("Called plotCorrScatterPlot")
        params = {'legend.fontsize': 8, 
                  'figure.figsize': (10, 10), 
                  'figure.titlesize': 8,
                  'axes.labelsize': 6, 
                  'axes.titlesize': 6, 
                  'xtick.labelsize': 6, 
                  'ytick.labelsize': 6}
        pylab.rcParams.update(params)
        figureTwo, axes = plt.subplots()
        self.corrScatterPlot(df, axes, cutOff, names)
        title = 'Scattered correlation plot for the real data and synthetic data of {sample} with {cut} cut off.png'.format(sample = names[0], cut = cutOff)
        plt.savefig(title, dpi = 300, bbox_inches = 'tight')
        plt.plot()
                         
    # function corrScatterPlot is a helper function for scatter-plotting the correlation between two sets of data with 
    # different colors. 
    def corrScatterPlot(self, df, ax, cutOff, names):
        """
        the corrScatterPlot is a scatter plot on which the counts of the real data and the simulated data are plotted.
        before plotting the scatter plot, first process the data set with a 2-components PCA. picking the principal 
        component keeps the most variability and using it as the x-coordinates. then, plot the scatter plot with the
        counts of the real data and the simulated data as the y-coordinates
        Input:
        @df: a pandas dataframe object which provides the data for plotting
        @ax: an axes object on which the plot is going to be plotted
        @names: names of samples to be plotted
        Output:
        an axes object
        """
        pca = PCA(n_components = 2)
        pca.fit(df)
    
        # take out the explained_variance_ for finding the one keeps the most variability
        variances = pca.explained_variance_
    
        # index records the index of the desired variances
        index = 0 if (variances[0] > variances[1]) else 1
    
        pcs = pca.transform(df)
    
        # take the components keep the most variability
        pc = pcs[ : , index]
    
        # calculate how many genes lost in the principal component
        diff = len(df) - len(pc)
    
        # find the percentage of the variability being kept in the principal component
        var_ratio = pca.explained_variance_ratio_
        var_most = var_ratio[index]
    
        # find the minimum components
        min_pc = np.amin(pc)

        # check if it is negative, make it non-negative and add it to pc
        if min_pc < 0:
            min_pc = np.abs(min_pc)
            pc = pc + min_pc
        
        #calculate the correlation between the two sets of data.
        corr = scipy.stats.pearsonr(df.iloc[ : , 0], df.iloc[ : , 1])[0]   
        plt.style.use('seaborn-whitegrid')
        # plt.style.use('dark_background')
        ax.clear()
        ax.scatter(pc, df.iloc[ : , 0], s = 1, c = 'b', alpha = 0.5, marker = "s", label = 'Real')
        ax.scatter(pc, df.iloc[ : , 1], s = 1, c = 'r', alpha = 0.5, marker = "o", label = 'Simulated')
    
        at = AnchoredText("Sample: {sample}\nCut threshould: {cut}\nRatio of variability being kept in the principal component: {most}\n{diff} genes lost in the principal component\nCorr: {corr}".format(
            sample = names[0], corr = corr, cut = cutOff, most = var_most, diff = diff), 
                          prop = dict(size = 8), frameon = True, loc = 'upper left')
        at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
        ax.add_artist(at)
        ax.set_xlabel("Simulated Data")
        ax.set_ylabel("Real Data")
        #ax.set_title("Scatted plot for the correlation between real data and simulated data")
        ax.legend(loc = 'center left', fancybox = True, shadow = False, scatterpoints = 200, prop = {'size': 8});
    

Unnamed: 0,GeneID,PDX_14P3_20,PDX_14P3_20_Sim
0,ENSG00000000003,5066.279722,5159.717472
1,ENSG00000000005,14.092113,14.901953
2,ENSG00000000419,1715.516365,1831.702142
3,ENSG00000000457,192.058293,213.403958
4,ENSG00000000460,365.414831,337.999417
...,...,...,...
20947,ENSG00000273485,132.876114,131.129663
20948,ENSG00000273486,118.774919,117.230137
20949,ENSG00000273487,16.105308,15.895381
20950,ENSG00000273488,45.297330,44.704621


In [39]:
kallPath = "/home/lima/Projects/simulation/TranscriptSingleSample/cntMatrices_Kall.csv"
kallDF = pd.read_csv(kallPath)
kallDF = kallDF.rename(columns = {"Unnamed: 0": "TranscriptomeID"})
kallDF

Unnamed: 0,TranscriptomeID,PDX_10Veh_Kall,PDX_12Veh_Kall,PDX_13P3_40_Kall,PDX_14P3_20_Kall,PDX_1P3_20_Kall,PDX_2P3_40_Kall,PDX_3P3_40_Kall,PDX_4P3_20_Kall,PDX_5P3_40_Kall,PDX_6P3_20_Kall,PDX_7P3_20_Kall,PDX_8Veh_Kall,PrimaryTumor_Kall
0,ENST00000415118.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
1,ENST00000434970.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2,ENST00000448914.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
3,ENST00000604642.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
4,ENST00000603326.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180248,ENST00000450690.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
180249,ENST00000605523.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.814364
180250,ENST00000605654.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.398913
180251,ENST00000603923.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


In [12]:
rsemPath = "/home/lima/Projects/simulation/TranscriptQuant.RealDataRSEM/cntMatrices_RSEM.csv"
rsemDF = pd.read_csv(rsemPath)
rsemDF = rsemDF.rename(columns = {"Unnamed: 0": "TranscriptomeID"})
rsemDF

Unnamed: 0,TranscriptomeID,PDX_10Veh,PDX_12Veh,PDX_13P3_40,PDX_14P3_20,PDX_1P3_20,PDX_2P3_40,PDX_3P3_40,PDX_4P3_20,PDX_5P3_40,PDX_6P3_20,PDX_7P3_20,PDX_8Veh,PrimaryTumor
0,ENST00000373020,3110.826328,1373.233983,4511.588795,3094.840621,3833.497701,4608.401041,3907.481643,4794.502657,5532.675937,3763.096813,3977.164745,4649.666301,4372.383189
1,ENST00000494424,20.942942,47.558741,63.957345,31.399837,11.134898,26.683315,19.393524,34.556585,48.489554,33.022034,51.166021,40.050277,39.141640
2,ENST00000496771,73.652784,0.000000,114.277138,54.479622,0.000000,105.946174,50.769851,99.604675,131.142126,81.204527,71.727135,90.073029,101.147416
3,ENST00000373031,9.199564,23.186203,23.203554,9.515067,5.582347,3.895577,2.241603,4.645070,7.746967,3.959939,6.100241,13.082332,0.000000
4,ENST00000485971,0.000000,24.733969,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196349,ENST00000608041,9.270182,76.011547,5.335229,25.616037,24.324704,24.286108,22.277398,16.708099,9.101200,5.961555,5.372735,8.497456,46.326490
196350,ENST00000610193,18.372827,772.492540,2.327237,0.677210,36.920302,3.239746,20.949518,1.988206,1.289507,10.553138,12.946252,2.306854,142.329322
196351,ENST00000608591,0.000000,206.432205,6.197407,2.713438,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.524030,0.000000,0.000000
196352,ENST00000609365,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [55]:
kallDF['TranscriptomeID'] = kallDF['TranscriptomeID'].str.replace("\.[0-9]", "")

In [56]:
kallTest = kallDF[['TranscriptomeID', 'PDX_10Veh_Kall']]

In [57]:
rsemTest = rsemDF[['TranscriptomeID', 'PDX_10Veh']]

In [58]:
mergedDF = rsemTest.join(kallTest.set_index('TranscriptomeID'), on = 'TranscriptomeID')

In [59]:
mergedDF

Unnamed: 0,TranscriptomeID,PDX_10Veh,PDX_10Veh_Kall
0,ENST00000373020,3110.826328,4331.283964
1,ENST00000494424,20.942942,37.930461
2,ENST00000496771,73.652784,101.621339
3,ENST00000373031,9.199564,12.946618
4,ENST00000485971,0.000000,0.000000
...,...,...,...
196349,ENST00000608041,9.270182,
196350,ENST00000610193,18.372827,
196351,ENST00000608591,0.000000,
196352,ENST00000609365,0.000000,


In [60]:
kallNaNIdx = mergedDF['PDX_10Veh_Kall'].index[mergedDF['PDX_10Veh_Kall'].apply(np.isnan)]
kallNaNIdx

Int64Index([   443,    839,    840,    841,    842,    843,    844,    845,
               846,    847,
            ...
            196343, 196344, 196346, 196347, 196348, 196349, 196350, 196351,
            196352, 196353],
           dtype='int64', length=32978)

In [64]:
kallNaNIdxList = kallNaNIdx.values.tolist()

In [69]:
mergedDF.loc[kallNaNIdxList]

Unnamed: 0,TranscriptomeID,PDX_10Veh,PDX_10Veh_Kall
443,ENST00000004531,0.000000,
839,ENST00000382189,0.000000,
840,ENST00000452401,60.963431,
841,ENST00000585725,71.375940,
842,ENST00000586332,2.524157,
...,...,...,...
196349,ENST00000608041,9.270182,
196350,ENST00000610193,18.372827,
196351,ENST00000608591,0.000000,
196352,ENST00000609365,0.000000,


In [70]:
rsemNaNIdx = mergedDF['PDX_10Veh'].index[mergedDF['PDX_10Veh'].apply(np.isnan)]
rsemNaNIdxList = rsemNaNIdx.values.tolist()

In [71]:
mergedDF.loc[rsemNaNIdxList]

Unnamed: 0,TranscriptomeID,PDX_10Veh,PDX_10Veh_Kall


In [72]:
rsemZeroIdx = mergedDF['PDX_10Veh'].index[mergedDF['PDX_10Veh'] == 0.0]
rsemZeroIdxList = rsemZeroIdx.values.tolist()

In [74]:
kallZeroIdx = mergedDF['PDX_10Veh_Kall'].index[mergedDF['PDX_10Veh_Kall'] == 0.0]
kallZeroIdxList = kallZeroIdx.values.tolist()

In [77]:
kallNaNIdxSet = set(kallNaNIdxList)
rsemZeroIdxSet = set(rsemZeroIdxList)
kallZeroIdxSet = set(kallZeroIdxList)

In [145]:
foundByRsemOnlyButZero = kallNaNIdxSet & rsemZeroIdxSet
foundByRsemOnlyAndExpressed = kallNaNIdxSet - foundByRsemOnlyButZero
foundByBothButZeroInBoth = rsemZeroIdxSet & kallZeroIdxSet
foundByBothButZeroInRsem = rsemZeroIdxSet - foundByBothButZeroInBoth - foundByRsemOnlyButZero
foundByBothButZeroInKall = kallZeroIdxSet - foundByBothButZeroInBoth

In [146]:
foundByRsemOnlyButZero & foundByBothButZeroInRsem

set()

In [167]:
mergedDF['Label'] = 'FoundByBothAndExpreseedInBoth'

In [168]:
mergedDF.loc[list(foundByRsemOnlyButZero), 'Label'] = "FoundByRsemOnlyButZero"

In [169]:
mergedDF.loc[list(foundByRsemOnlyAndExpressed), 'Label'] = "FoundByRsemOnlyAndExpressed"

In [170]:
mergedDF.loc[list(foundByBothButZeroInBoth), 'Label'] = "FoundByBothButZeroInBoth"

In [171]:
mergedDF.loc[list(foundByBothButZeroInRsem), 'Label'] = "FoundBytBothButZeroInRsem"

In [172]:
mergedDF.loc[list(foundByBothButZeroInKall), 'Label'] = "FoundByBothButZeroInKall"

In [173]:
mergedDF['PDX_10Veh_Kall'] = mergedDF['PDX_10Veh_Kall'].fillna(0.0)

In [174]:
df = mergedDF.drop(columns = ['TranscriptomeID'])

In [175]:
df[['PDX_10Veh', 'PDX_10Veh_Kall']] = np.log2(df[['PDX_10Veh', 'PDX_10Veh_Kall']] + 1)

In [176]:
df.Label

0         FoundByBothAndExpreseedInBoth
1         FoundByBothAndExpreseedInBoth
2         FoundByBothAndExpreseedInBoth
3         FoundByBothAndExpreseedInBoth
4              FoundByBothButZeroInBoth
                      ...              
196349      FoundByRsemOnlyAndExpressed
196350      FoundByRsemOnlyAndExpressed
196351           FoundByRsemOnlyButZero
196352           FoundByRsemOnlyButZero
196353           FoundByRsemOnlyButZero
Name: Label, Length: 196354, dtype: object

In [None]:
sns.set(style = "whitegrid", palette = "muted", color_codes = True)
figureOne, axes = plt.subplots(2, 2, constrained_layout = True, figsize = (20, 20))
# put the violin plot to axes[0, 0]
axes[0, 0].clear()
sns.violinplot(data = df, linewidth = 0.8, palette = "husl", ax = axes[0, 0])
    
# put the boxplot to axes[1, 1]
axes[1, 1].clear()
sns.boxplot(data = df, linewidth = 0.8, palette = "husl", ax = axes[1, 1])
    
# put the distribution plot to axes[0, 1]
axes[0, 1].clear()
sns.distplot(df.iloc[ : , 0], kde = True, color = "b", label = 'RSEM', ax = axes[0, 1])
sns.distplot(df.iloc[ : , 1], kde = True, color = "r", label = 'Kallisto', ax = axes[0, 1])
axes[0, 1].legend(loc = 'best', prop = {'size': 10})
axes[0, 1].set_xlabel("Distribution plot for counts found by rsem and counts found by kall")

sns.scatterplot(x = 'PDX_10Veh', y = 'PDX_10Veh_Kall', hue = 'Label', marker = '+', data = df, ax = axes[1, 0], palette = 'muted', s = 2.0)
#at = AnchoredText("{inBoth} are found in both real and simulated data\n{neither} expressed in neither real nor simulated data\n{realOnly} are found in real data only\n{simOnly} are found in simulated data only\nCut off threshould: {cut}\nCorr: {corr}".format(
#    neither = neither, realOnly = realOnly, simOnly = simOnly, inBoth = inBoth, corr = corr, total = len(df) - realOnly - simOnly + len(inNeither), cut = cutOff), 
#                  prop = dict(size = 8), frameon = True, loc = 'upper left')
#at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
   
#axes[1, 0].add_artist(at)
axes[1, 0].set_xlabel("RSEM")
axes[1, 0].set_ylabel("Kallisto")
axes[1, 0].legend(loc = 'lower right', fancybox = True, shadow = False, scatterpoints = 200, prop = {'size': 8})