In [1]:
import os
import scipy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from openpyxl import load_workbook

from scipy import stats as stats
from copy import copy as copy


In [2]:
# read in paths from config file

configDict = {
    'rootDir': '',
    'initialDataPath' : '',
    'QCDataPath' : '',
    # 'labWorksheet01Path':'',
    'projectName':''
}

# with open(os.path.join(workingDir, 'config.txt'),'r') as f:
with open('config.txt','r') as f:
    lines = f.readlines()
    for line in lines:
        if not line.startswith('#'):
            line = line.strip()
            fields = line.split(':')

            if fields[0].strip()=='initialDataPath':
                configDict[fields[0].strip()] = fields[1].strip().strip('\'')
            else:
                configDict[fields[0].strip()] = fields[1].strip().strip('\'')
## ToDo: Add checks to ensure that minimal fields have been populated. Raise errors or warnings


In [3]:
configDict

{'rootDir': '/Users/upton6/Documents/Nanostring/Projects/NS_Coeliac/DSP_Output/',
 'initialDataPath': 'Initial Dataset.xlsx',
 'QCDataPath': 'Default_QC.xlsx',
 'projectName': 'Subramaniam_msWTA',
 'DCCPath': ''}

# Define functions

In [4]:
class master_data:
    def __init__(self, dataPath):
        ### import data from excel workbook
        
        self.wb = load_workbook(dataPath)
        self.segWs = self.wb['SegmentProperties']
        self.cntWs = self.wb['BioProbeCountMatrix']
        
        self.segValues = [[y.value for y in x] for x in self.segWs[self.segWs.calculate_dimension()]]
        self.cntValues = [[y.value for y in x] for x in self.cntWs[self.cntWs.calculate_dimension()]]

        self.dropData = False
        
        
    def get_data(self):
        ### Convert nested list to a pandas dataFrame and extract expression data with labels
        cntData = self.cntValues
        cntCols = self.cntValues[0]
        df = pd.DataFrame(self.cntValues)
        cntIndex = [x[0] for x in self.cntValues[1:]]
        cntDF = pd.DataFrame(self.cntValues[1:], index=cntIndex, columns=cntCols)
        self.counts = cntDF.iloc[:,12:]
        self.counts = self.counts.astype(np.float64)      # Convert datatype to float64
        self.priobeInfo = cntDF.iloc[:,:12]
        segCols = self.segValues[0]
        segIndex = [x[4].replace(' | ',('_')) for x in self.segValues[1:]]
        self.segData = pd.DataFrame(self.segValues[1:], index=segIndex, columns=segCols)

        self.dataOrig = self.counts.copy()
        self.dataLog1 = np.log2(self.counts+1)            # Log transform data for QC and analysis steps

        self.probeClass = False                           # Keep a copy of the original data before transformation or normalisation
        # self.probeClass = df.iloc[self.targIdx:,2]      ### Index needs updating here also
        # self.probeClass.rename(index=rowLabels, inplace=True)
        # self.probeClass.rename(index='ProbeClass', inplace=True)

        # ToDo: Need to update probeclass handling and labeling
        
        self.probeClassDict = {
            'Positive': 'A',
            'Negative': 'B',
            'Control': 'C',
            'Endogenous': 'E'}

        return(self.counts, self.priobeInfo, self.segData)

    def get_descriptors(self):
        pass

    def add_class_mean(self, df):
        pass

    def drop_AOIs(self, includes, writeOrig=False):
        pass
        
    def set_threshold(self, threshold):
        self.threshold = threshold
        # ToDo: Check that all values in master data are also included in threshold dataFrame
        # ToDo: Convert threshold data to 0/1 data if needed

    def drop_probes(self, labels):
        pass
        


In [5]:
def plot_SA_Hist(surfArea):
    fig, ax = plt.subplots(figsize=(10,5))
    ax.hist(surfArea, bins=50)
    ax.set_xlabel('AOI Surface Area µm', fontdict=labelFont)
    ax.set_ylabel('Count', fontdict=labelFont)
    ax.set_title('AOI Surface Area distribution', fontdict=titleFont)
    print('Min SA')
    print(min(surfArea))
    print('Max SA')
    print(max(surfArea))
    return(fig)


In [6]:
# Plot log2 transformed raw data before any normalisation

def draw_probe_plot(dataRaw, dataSortedRaw, namedColourList, title, exp=False, violin=False):
    
    fig, ax = plt.subplots(figsize=(15,8))
    
    if exp:
        ax.boxplot(np.exp2(dataRaw.drop(labels=['mean','probeClass'], axis=1).reindex(labels=dataSortedRaw.index).T) -1, sym='-', labels=dataSortedRaw.index)
    else:
        ax.boxplot(dataRaw.drop(labels=['mean','probeClass'], axis=1).reindex(labels=dataSortedRaw.index).T, sym='-', labels=dataSortedRaw.index)

    
    if violin:
        if exp:
            ax.violinplot(np.exp2(dataRaw.drop(labels=['mean','probeClass'], axis=1).reindex(labels=dataSortedRaw.index).T) -1)

    #         ax.boxplot(np.exp2(dataRaw.drop(labels=['mean','probeClass'], axis=1).reindex(labels=dataSortedRaw.index).T) -1, sym='-', labels=dataSortedRaw.index)
        else:
            ax.violinplot(dataRaw.drop(labels=['mean','probeClass'], axis=1).reindex(labels=dataSortedRaw.index).T)

    #         ax.boxplot(dataRaw.drop(labels=['mean','probeClass'], axis=1).reindex(labels=dataSortedRaw.index).T, sym='-', labels=dataSortedRaw.index)
        
        
    else:


        for i,j in enumerate(dataSortedRaw.index):
            y = dataRaw.drop(labels=['mean','probeClass'], axis=1).loc[j]
            colours = [namedColourList[2] if v.split('_')[-1] == 'Tumour' else namedColourList[5] if v.split('_')[-1] == 'TME' else namedColourList[1] for v in y.index]
        #     colours = [colourList[2] if v.split('_')[-1] == 'Tumour' else colourList[5] if v.split('_')[-1] == 'Immune' else colourList[1] for v in y.index]
            y = y
            if exp:
                y = np.exp2(y.values)-1
            else:
                y = y.values

            x = np.random.normal(i+1, 0.1, len(y))

            for i in range(len(x)): 
                ax.plot(x[i], y[i], c=colours[i], marker='.', alpha=0.25)

    ax.set_xticks(np.arange(1,len(dataSortedRaw.index)+1,1))
    ax.set_xlabel=list(dataSortedRaw.index)
    
    print(len(np.arange(0,len(dataSortedRaw.index),1)))
    print(len(list(dataSortedRaw.index)))
    
    ax.tick_params(axis='x', labelrotation = 90)
    

    if exp:
        ax.semilogy()
        ax.set_title(title + ' (untransformed)', size=36)
        ax.set_ylabel('Probe value', size=24)
    else:
        ax.set_title(title + ' (Log2 transformed)', size=36)
        ax.set_ylabel('Log2 probe value', size=24)
#     plt.show()
    
    return(fig)


In [7]:
def probe_GeoMean_Plots(plotData, title=''):
    rows=1
    cols=2
    colours = [namedColourList[2] if x.split('_')[-1] == 'Tumour' else namedColourList[5] if x.split('_')[-1] == 'TME' else namedColourList[1] for x in HKGeoMean.index]

    fig,ax = plt.subplots(rows,cols, sharey=True, gridspec_kw={'width_ratios': [4,1]}, figsize=(15,5))
    ax[0].bar(np.linspace(1,len(plotData),len(plotData)), plotData, color=colours)
    ax[1].hist(plotData, bins=int(len(plotData)/10),orientation='horizontal', color='k')
    ax[0].set_xlim(0,len(plotData))
    
    ax[0].text(2,max(plotData)*.95,'Tumour', size=20, c=namedColourList[2])
    ax[0].text(2,max(plotData)*.825,'TME', size=20, c=namedColourList[5])
    ax[0].text(2,max(plotData)*.7,'Other', size=20, c=namedColourList[1])

    fig.suptitle(title, size=36)
    ax[0].set_ylabel('Probe Value', size=18)
    ax[0].set_xlabel('Probes', size=18)
    ax[1].set_xlabel('Count', size=18)

    fig.tight_layout()

In [8]:
class threshold_probes:
    def __init__(self, data, bins):
        
        self.data = data.drop(labels=['mean','probeClass'], axis=1)
        self.bins = bins
        self.thisHist = plt.hist(self.data.values.flatten(), bins = self.bins)

    def zoom_plot(self, start, end):
        plt.hist(self.data.values.flatten(), bins = self.bins)
        plt.xlim(0,3)        
        
    def check_threshold(self, start, end):
        print(self.thisHist[0][start:end])
        print(self.thisHist[1][start:end])

    def set_threshold_idx(self, idx):
        print(self.thisHist[0][idx])
        print(self.thisHist[1][idx])
        
        self.threshold_idx = idx
        self.threshold = self.thisHist[1][idx]

    def get_filter(self):
        self.ETfilter = self.data >= self.threshold
        return(self.ETfilter)


In [9]:
def binding_density_plot(sampleInfoExternal, selectedInfo, subSelection):
    # print('selectedInfo')
    # print(selectedInfo)
    
    if not (subSelection == None):
        selectedInfo = selectedInfo.loc[subSelection]
    if (type(selectedInfo) == pd.core.series.Series):
        selectedInfo = pd.DataFrame(selectedInfo).T
        
    comboUniques = []
    comboColourDictRev = {}
    for c in selectedInfo.columns:
        thisCol = selectedInfo[c]
        combined = '_'.join(thisCol.values)
        comboUniques.append(combined)
        comboColourDictRev[c] = combined
    comboUniques = sorted(list(set(comboUniques)))
    print('\nNumber of unique combinations: {}'.format(len(comboUniques)))
    # print(comboColourDictRev)

    gradient = np.linspace(0, 1, len(comboUniques))
    gradDict = dict(zip(comboUniques,gradient))
    
    sampleInfoExternal.sort_values(by=['Plate', 'Col', 'Row'], axis=1, inplace=True)
    # Binding Density plot:
    plt.figure(figsize=(40,10))
    my_cmap = plt.get_cmap("nipy_spectral")
    
    colours = []
    for c in sampleInfoExternal.columns:
        colours.append(gradDict[comboColourDictRev[c]])
    
    print('selectedInfo.index')
    print(list(selectedInfo.index))

    fig, ax = plt.subplots(figsize=(20,5))

    
    bar = ax.bar(sampleInfoExternal.columns,
            sampleInfoExternal.loc['BindingDensity'].values.astype(np.float32), 
            color=my_cmap(colours)
           )#, bottom=0)
    ax.set_title('_'.join(selectedInfo.index))
    ax.set_xticklabels(sampleInfoExternal.columns, rotation='vertical')
    # ax.legend()
    plt.show()

# ToDo: Add legend

# Import data from Nanostring initial dataset file.

In [10]:
dataPath = os.path.join(configDict['rootDir'],configDict['initialDataPath'])
dataPath = os.path.join(configDict['rootDir'],configDict['QCDataPath'])

masterData = master_data(dataPath)


counts, probes, segs = masterData.get_data()


counts

# probes

# segs

  warn("Workbook contains no default style, apply openpyxl's default")


Unnamed: 0,TRI-QUT Coeliac Study_TMA Block 4 | 001 | Segment 1,TRI-QUT Coeliac Study_TMA Block 4 | 001 | Segment 2,TRI-QUT Coeliac Study_TMA Block 4 | 002 | Segment 1,TRI-QUT Coeliac Study_TMA Block 4 | 002 | Segment 2,TRI-QUT Coeliac Study_TMA Block 4 | 003 | Segment 1,TRI-QUT Coeliac Study_TMA Block 4 | 003 | Segment 2,TRI-QUT Coeliac Study_TMA Block 4 | 004 | Segment 1,TRI-QUT Coeliac Study_TMA Block 4 | 004 | Segment 2,TRI-QUT Coeliac Study_TMA Block 4 | 005 | Segment 1,TRI-QUT Coeliac Study_TMA Block 4 | 005 | Segment 2,...,TRI-QUT Coeliac Study_TMA Block 1 | 026 | Segment 1,TRI-QUT Coeliac Study_TMA Block 1 | 026 | Segment 2,TRI-QUT Coeliac Study_TMA Block 1 | 027 | Segment 1,TRI-QUT Coeliac Study_TMA Block 1 | 027 | Segment 2,TRI-QUT Coeliac Study_TMA Block 1 | 028 | Segment 1,TRI-QUT Coeliac Study_TMA Block 1 | 028 | Segment 2,TRI-QUT Coeliac Study_TMA Block 1 | 029 | Segment 1,TRI-QUT Coeliac Study_TMA Block 1 | 029 | Segment 2,TRI-QUT Coeliac Study_TMA Block 1 | 030 | Segment 1,TRI-QUT Coeliac Study_TMA Block 1 | 030 | Segment 2
48107,149.0,50.0,297.0,67.0,234.0,29.0,447.0,43.0,612.0,47.0,...,279.0,24.0,484.0,52.0,366.0,21.0,121.0,44.0,412.0,53.0
45080,125.0,50.0,144.0,55.0,208.0,42.0,190.0,59.0,162.0,43.0,...,151.0,34.0,197.0,56.0,71.0,22.0,139.0,66.0,185.0,71.0
48627,61.0,45.0,64.0,49.0,91.0,24.0,71.0,31.0,73.0,34.0,...,80.0,24.0,63.0,40.0,20.0,16.0,97.0,36.0,89.0,34.0
36143,41.0,24.0,75.0,49.0,68.0,26.0,62.0,38.0,69.0,25.0,...,93.0,29.0,60.0,38.0,26.0,20.0,87.0,71.0,83.0,51.0
41381,53.0,37.0,70.0,51.0,79.0,23.0,69.0,35.0,41.0,29.0,...,77.0,19.0,52.0,27.0,30.0,9.0,95.0,58.0,94.0,42.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44262,133.0,79.0,127.0,100.0,211.0,50.0,195.0,74.0,162.0,49.0,...,195.0,52.0,190.0,87.0,76.0,37.0,185.0,98.0,178.0,82.0
37145,42.0,42.0,41.0,50.0,47.0,28.0,62.0,31.0,44.0,28.0,...,54.0,19.0,45.0,39.0,26.0,12.0,66.0,38.0,71.0,35.0
51325,262.0,66.0,180.0,47.0,452.0,51.0,340.0,61.0,249.0,34.0,...,294.0,43.0,242.0,56.0,131.0,18.0,211.0,59.0,327.0,79.0
50331,70.0,44.0,67.0,51.0,114.0,27.0,87.0,40.0,71.0,30.0,...,85.0,25.0,84.0,39.0,28.0,9.0,114.0,37.0,109.0,38.0


ToDo: Need to check that column names are unique in SegmentProperties!! KO has 2 entries in current version, need to trace down where this has been introduced.

In [11]:
dropCols = ['SlideName', 'ScanLabel', 'ROILabel', 'SegmentLabel',
            'SegmentDisplayName', 'Origin Instrument ID', 'AOISurfaceArea', 
            'AOINucleiCount', 'ROICoordinateX', 'ROICoordinateY', 
            'RawReads', 'AlignedReads', 'DeduplicatedReads', 'TrimmedReads', 
            'StitchedReads', 'SequencingSaturation', 'SequencingSetID', 
            'UMIQ30', 'RTSQ30', 'GeoMxNgsPipelineVersion', 'LOT_Mouse_NGS_Whole_Transcriptome_Atlas_RNA_1_0',
            'ROIID', 'SegmentID', 'ScanWidth', 'ScanHeight', 'ScanOffsetX', 'ScanOffsetY']

keepCols = [x for x in segs.columns if not x in dropCols]

for c in keepCols:
    print(c)
    print(len(set(segs[c])))
    print(set(segs[c]))
    print()

Segment 1
2
{'True', 'False'}

Segment 2
2
{'True', 'False'}

QCFlags
5
{'Low Surface Area,Low Negative Probe Count for Probe Kit Human NGS Whole Transcriptome Atlas RNA_1.0,Low Nuclei Count', None, 'Low Surface Area,Low Nuclei Count', 'Low Nuclei Count', 'Low Negative Probe Count for Probe Kit Human NGS Whole Transcriptome Atlas RNA_1.0'}

LOT_Human_NGS_Whole_Transcriptome_Atlas_RNA_1_0
1
{'HWTA21004'}

Group
3
{'Placebo', 'Control', 'Hookworm'}

Time_point
4
{'Baseline', 'Post challenge', 'Post hookworm', 'Control'}

Clinical_outcome
3
{'Non responder', 'Control', 'Responder'}

Core
69
{'001/010 17_15576_a', '002/002 18_13187_b', '003/011 17_19905_b', '001/015 18_11839_a', '003/015 17_40912_b', '001/031 17_40620_a', '001/014 18_11840_a', '001/022 18_16609_a', '002/003 18_13185_b', '003/006 17_33069_b', '001/014 17_35196_b', '001/015 17_35197_b', '001/009 17_31104_a', '003/002 18_02240_b', '001/002 18_01086_b', '001/002 17_15571_b', '001/028 17_40618_b', '003/011 18_04540_a', '001/022

In [12]:
segs

Unnamed: 0,SlideName,ScanLabel,ROILabel,SegmentLabel,SegmentDisplayName,Origin Instrument ID,Segment 1,Segment 2,QCFlags,AOISurfaceArea,...,Group,Time_point,Clinical_outcome,Core,ROIID,SegmentID,ScanWidth,ScanHeight,ScanOffsetX,ScanOffsetY
TRI-QUT Coeliac Study_TMA Block 4_001_Segment 1,TRI-QUT Coeliac Study_TMA Block 4,TRI-QUT Coeliac Study_TMA Block 4,001,Segment 1,TRI-QUT Coeliac Study_TMA Block 4 | 001 | Segm...,2001G0086,True,False,,41162.306660,...,Control,Control,Control,001/018 17_29377,810d3b02-f857-4695-a318-a96bd4809c13,50a4b52d-c29f-4c0c-b4f9-5231ad0b8bb6,18690.275391,70258.562500,7039.0,5830.0
TRI-QUT Coeliac Study_TMA Block 4_001_Segment 2,TRI-QUT Coeliac Study_TMA Block 4,TRI-QUT Coeliac Study_TMA Block 4,001,Segment 2,TRI-QUT Coeliac Study_TMA Block 4 | 001 | Segm...,2001G0086,False,True,,37706.250098,...,Control,Control,Control,001/018 17_29377,810d3b02-f857-4695-a318-a96bd4809c13,ede3e81a-8804-4234-84a4-e6d808f68860,18690.275391,70258.562500,7039.0,5830.0
TRI-QUT Coeliac Study_TMA Block 4_002_Segment 1,TRI-QUT Coeliac Study_TMA Block 4,TRI-QUT Coeliac Study_TMA Block 4,002,Segment 1,TRI-QUT Coeliac Study_TMA Block 4 | 002 | Segm...,2001G0086,True,False,,36039.266939,...,Hookworm,Post challenge,Non responder,001/015 18_11839_a,b347940b-a902-4a7f-87b5-ed044f98e0e2,5a86ee5b-ce6b-4f79-b11e-3d844b8d5bae,18690.275391,70258.562500,7039.0,5830.0
TRI-QUT Coeliac Study_TMA Block 4_002_Segment 2,TRI-QUT Coeliac Study_TMA Block 4,TRI-QUT Coeliac Study_TMA Block 4,002,Segment 2,TRI-QUT Coeliac Study_TMA Block 4 | 002 | Segm...,2001G0086,False,True,,50750.904001,...,Hookworm,Post challenge,Non responder,001/015 18_11839_a,b347940b-a902-4a7f-87b5-ed044f98e0e2,c0d67878-b053-4d3b-a898-cdc91b00abb8,18690.275391,70258.562500,7039.0,5830.0
TRI-QUT Coeliac Study_TMA Block 4_003_Segment 1,TRI-QUT Coeliac Study_TMA Block 4,TRI-QUT Coeliac Study_TMA Block 4,003,Segment 1,TRI-QUT Coeliac Study_TMA Block 4 | 003 | Segm...,2001G0086,True,False,,51688.829455,...,Placebo,Post challenge,Responder,002/002 18_13187_a,0a1d6f54-3e12-4ead-88f9-9ecd5ead5c77,733e608f-0803-4dd9-8a6f-e2fac125825b,18690.275391,70258.562500,7039.0,5830.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TRI-QUT Coeliac Study_TMA Block 1_028_Segment 2,TRI-QUT Coeliac Study_TMA Block 1,TRI-QUT Coeliac Study_TMA Block 1,028,Segment 2,TRI-QUT Coeliac Study_TMA Block 1 | 028 | Segm...,2001G0086,False,True,,20138.873102,...,Placebo,Baseline,Non responder,001/010 17_15576_b,2e3bd7e8-3898-4a87-a481-b7851133b65e,605284a8-c955-4b17-b60f-f8e458bca7b7,13164.822266,68415.953125,1609.0,6752.0
TRI-QUT Coeliac Study_TMA Block 1_029_Segment 1,TRI-QUT Coeliac Study_TMA Block 1,TRI-QUT Coeliac Study_TMA Block 1,029,Segment 1,TRI-QUT Coeliac Study_TMA Block 1 | 029 | Segm...,2001G0086,True,False,,46876.795236,...,Hookworm,Post hookworm,Responder,003/015 18_00548_a,3fb36b84-e580-485b-8fd4-05c748bc6ce2,12aa7779-b9cf-4e09-ad43-96e638779053,13164.822266,68415.953125,1609.0,6752.0
TRI-QUT Coeliac Study_TMA Block 1_029_Segment 2,TRI-QUT Coeliac Study_TMA Block 1,TRI-QUT Coeliac Study_TMA Block 1,029,Segment 2,TRI-QUT Coeliac Study_TMA Block 1 | 029 | Segm...,2001G0086,False,True,,50995.242845,...,Hookworm,Post hookworm,Responder,003/015 18_00548_a,3fb36b84-e580-485b-8fd4-05c748bc6ce2,763f9a1c-710e-45e3-80ff-cbac982767c8,13164.822266,68415.953125,1609.0,6752.0
TRI-QUT Coeliac Study_TMA Block 1_030_Segment 1,TRI-QUT Coeliac Study_TMA Block 1,TRI-QUT Coeliac Study_TMA Block 1,030,Segment 1,TRI-QUT Coeliac Study_TMA Block 1 | 030 | Segm...,2001G0086,True,False,,56949.636379,...,Hookworm,Post hookworm,Responder,003/015 18_00548_b,064f8316-b93e-4b4e-895f-99932d936f32,8cf8847f-4d98-4355-892e-eac5d00fdfa4,13164.822266,68415.953125,1609.0,6752.0


In [13]:
set(segs['SlideName'])

{'TRI-QUT Coeliac Study_TMA Block 1',
 'TRI-QUT Coeliac Study_TMA Block 2',
 'TRI-QUT Coeliac Study_TMA Block 3',
 'TRI-QUT Coeliac Study_TMA Block 4'}

In [14]:
expSlideDict = {}
expSlideDict['exp1'] = {'SlideName': ['PFAC_Liver_CD63_CD45_Col1A1']}
expSlideDict['exp2'] = {'SlideName': ['SFAC_Female_2_SMA_CD45_Col1A1']}
expSlideDict['exp3'] = {'SlideName': ['SKC_Liver_Female_2_SMA_CD45_Col1A1',
                         'SKC_Liver_Male_SMA_CD45_Col1A1',
                         'SKC_Male_Liver_2_SMA_CD45_Col1A1']}

exps = ['exp1', 'exp2', 'exp3']

In [15]:
expIndices = {}


for exp in exps:
    print(exp)

    thisExp = expSlideDict[exp]
    slideNames = thisExp['SlideName']

    masterIndex = []
    
    for s in slideNames:
        # print('s')
        # print(s)
        thisIndex = segs[segs['SlideName'] == s].index
        # print(list(thisIndex))
        masterIndex.extend(list(thisIndex))
    
    expIndices[exp] = masterIndex


exp1
exp2
exp3


In [16]:
expIndices

{'exp1': [], 'exp2': [], 'exp3': []}

In [17]:
expIndices

{'exp1': [], 'exp2': [], 'exp3': []}

In [18]:
for k,exp in expIndices.items():
    # print(exp)
    print(k)
    print(len(exp))

exp1
0
exp2
0
exp3
0


ToDo: Export experimental sets for use in StandR or RUV


 - StandR => countFile, featureAnnoFile, sampleAnnoFile
 - RUV.   => ???

ToDo: Choose column(s) to use for setting up primary experiment sets (eg. slide name) (save as nested dictionary (experiment name : column : value)
ToDo: Choose column variables to use for setting up secondary experiment sets  (save as nested dictionary (experiment name : columns : [values])



In [19]:
break

SyntaxError: 'break' outside loop (668683560.py, line 1)

In [None]:
sampleInfoExternal = masterData.segData.copy()

In [None]:
sampleInfoExternal.columns

In [None]:
sampleInfoExternal['TrimmedReads']

In [None]:
print(masterData.probeClass)
print(masterData.probeClassDict)
# dataLog1External, sampleInfoExternal = masterData.drop_AOIs('#16_9', writeOrig=True)

nuclei = sampleInfoExternal['AOINucleiCount']
# print(nuclei)
surfArea = sampleInfoExternal['AOISurfaceArea']
# print(surfArea)

# dataLog1External, sampleInfoExternal = masterData.add_class_mean(masterData.dataLog1)

In [None]:
slides = set(segs['SlideName'])

colLookup = dict(zip(slides,range(len(slides))))

In [None]:
colLookup

In [None]:
colMap = [colLookup[x]/len(slides) for x in segs['SlideName']]

# colMap = [colLookup[x] for x in segs['SlideName']]


In [None]:
colMap 