# Check Cell Population Heterogeneity

## Libraries

In [1]:
import MySQLdb
import pandas
import numpy as np
from matplotlib import pylab as plt
import os
import seaborn as sns
from scipy.stats import mannwhitneyu as mw
from scipy import stats
import operator
from sklearn.preprocessing import StandardScaler,RobustScaler
from sklearn.decomposition import PCA
from scipy import stats
import operator

## Routine Functions

In [2]:
def ensure_dir(file_path):
    '''
    Function to ensure a file path exists, else creates the path

    :param file_path:
    :return:
    '''
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)


In [3]:
# Effect size
def cohen_d(x, y):
    nx = len(x)
    ny = len(y)
    dof = nx + ny - 2
    return (np.mean(x) - np.mean(y)) / np.sqrt(
        ((nx - 1) * np.std(x, ddof=1) ** 2 + (ny - 1) * np.std(y, ddof=1) ** 2) / dof)


In [4]:
# Some Easy Outlier detection
def reject_outliers_2(data, m=6.):
    d = np.abs(data - np.median(data))
    mdev = np.median(d)
    s = d / (mdev if mdev else 1.)
    #return s < m
    return [data[i] for i in range(0, len(data)) if s[i] < m]


## Load list of significant perturbations
- Load all significant perturbations
- Load drug decay
- Load list of images that are excluded 
- Load list of features to investigate

### Significant perturbations

In [5]:
#Save significant perturbations
significant_perturbations =  []

#open the file indicating which drug perturbations are significant in a matter of mahalanobis distance to DMSO
fp = open('../data/Investigate_CellularHeterogeneity/Single_Perturbation_Significance.csv')
fp.next()

#go through whole file
for line in fp:
    
    #split row
    tmp = line.strip().split(',')
    
    #check if mahalanobis distance large than 7
    try:
        batch1_significance = float(tmp[1])
        batch2_significance = float(tmp[3])

        if batch1_significance > 7:
            significant_perturbations.append((tmp[0]+'_Batch1',batch1_significance))

        if batch2_significance > 7:
            significant_perturbations.append((tmp[0]+'_Batch2',batch2_significance))
    except:
        continue
        
#sort all perturbations and take the top 10
significant_perturbations.sort(key = operator.itemgetter(1), reverse = True)
significant_perturbations = significant_perturbations[0:10]


print significant_perturbations

[('CLOUD112_Batch2', 18.37538233940767), ('CLOUD057_Batch2', 17.949603639959136), ('CLOUD077_Batch2', 17.925072455942466), ('CLOUD089_Batch2', 17.47079522868084), ('CLOUD115_Batch2', 17.248539756395267), ('CLOUD129_Batch2', 17.10830389505952), ('CLOUD103_Batch2', 16.74349834279324), ('CLOUD053_Batch2', 16.71700833717189), ('CLOUD117_Batch2', 16.605844984937995), ('CLOUD031_Batch2', 16.54152648493228)]


### Drug Decay

In [6]:
# Both thresholds need to be true to set a drug as decayed during experiment; threshold_decay is steepness and threshold_MaxDifference absolute difference
threshold_decay = 0.05
threshold_MaxDifference = 0.3


# Load all the drug decay regressions
# Created by checking the single drug responses over the different plates (there is a temporal context between plate 1 and 123)
# One is interested both in the decay as well as the maximum change e.g. if gradient between 0.1 to 0.2, still ok
# Create a dic that tells about the status of drug decay i.e. True if drug WORKED CORRECTLY
path = '../data/Investigate_CellularHeterogeneity/DrugDecay_Combined.csv'
fp = open(path)
fp.next()
drug_decay = {}
batch1_Failed = 0
batch2_Failed = 0
for line in fp:
    tmp = line.strip().split(',')
    
    batch1_decay = float(tmp[1])
    batch1_diff = float(tmp[2])
    
    batch2_decay = float(tmp[3])
    batch2_diff = float(tmp[4])
    
    
    batch1_Status = True
    if batch1_decay >= threshold_decay and batch1_diff >= threshold_MaxDifference:
        batch1_Status = False
        batch1_Failed += 1
        
    batch2_Status = True
    if batch2_decay >= threshold_decay and batch2_diff >= threshold_MaxDifference:
        batch2_Status = False
        batch2_Failed += 1
    
    
    drug_decay[tmp[0]] = {'Batch1':batch1_Status,'Batch2':batch2_Status}
fp.close()

print 'Number of drugs that decayed in batch1: %d' %batch1_Failed
print 'Number of drugs that decayed in batch2: %d' %batch2_Failed

Number of drugs that decayed in batch1: 6
Number of drugs that decayed in batch2: 2


### Load selected features

In [7]:
selected_Features = []
fp = open('../data/Investigate_CellularHeterogeneity/Selected_Features.csv')
for line in fp:
    selected_Features.append(line.strip()[7:])
    
print 'Number of features: %d' %len(selected_Features)

Number of features: 78


### Load Problematic Images

In [8]:
problematic_images = {'Batch1':[],'Batch2':[]}

batches = ['1','2']
for batch_ in batches:
    fp = open('../data/Investigate_CellularHeterogeneity/BadImages/Batch'+batch_+'.csv','r')
    for line in fp:
        tmp = line.strip().split(',')
        problematic_images['Batch'+batch_].append(tmp[0])

## Actual Analysis

### Load corresponding images

In [9]:
# establish link
db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )

###########
#  DRUGS  #            
########### 

#this will contain all the image numbers that are associated with a specific drug (only singles!)
Image_Number_For_Drugs = {}

#go through the list of all significant perturbers
for entry in significant_perturbations:
    drug,batch_ = entry[0].split('_')
    batch_ = batch_[5]
    
    # check if the drug is not decayed
    if drug_decay[drug]['Batch'+batch_] == True:

        #SQL string
        string = 'select ImageNumber,Image_Metadata_Plate from DPN1018Batch'+batch_+'Per_Image where Image_Metadata_ID_A like "'+drug+'" and Image_Metadata_ID_B like "DMSO";'

        #Extract data via pandas
        ImageNumbers = pandas.read_sql(string, con=db)

        #go through all rows
        for line in ImageNumbers.iterrows():
            
            #extract ImageNumber and PlateNumber
            Drug_ImageNumber = line[1][0]
            Drug_PlateNumber = line[1][1]

            #add to dictionary
            if entry[0] not in Image_Number_For_Drugs:
                Image_Number_For_Drugs[entry[0]] = {Drug_PlateNumber:[Drug_ImageNumber]}
            elif Drug_PlateNumber not in  Image_Number_For_Drugs[entry[0]]:
                Image_Number_For_Drugs[entry[0]][Drug_PlateNumber] = [Drug_ImageNumber]
            else:
                Image_Number_For_Drugs[entry[0]][Drug_PlateNumber].append(Drug_ImageNumber)
###########
#  DMSO   #           
###########              
                
# this will contain imagenumbers for DMSO           
Image_Number_For_DMSO = {}                
for batch_ in ['1','2']:

    #SQL string
    string = 'select ImageNumber,Image_Metadata_Plate from DPN1018Batch'+batch_+'Per_Image where Image_Metadata_ID_A like "DMSO" and Image_Metadata_ID_B like "None";'
    
    #Extract data via pandas
    ImageNumbers = pandas.read_sql(string, con=db)

    #go through all rows
    for line in ImageNumbers.iterrows():
        
        #extract ImageNumber and PlateNumber
        Drug_ImageNumber = line[1][0]
        Drug_PlateNumber = line[1][1]

        #add to dictionary
        if batch_ not in Image_Number_For_DMSO:
            Image_Number_For_DMSO[batch_] = {Drug_PlateNumber:[Drug_ImageNumber]}
        elif Drug_PlateNumber not in  Image_Number_For_DMSO[batch_]:
            Image_Number_For_DMSO[batch_][Drug_PlateNumber] = [Drug_ImageNumber]
        else:
            Image_Number_For_DMSO[batch_][Drug_PlateNumber].append(Drug_ImageNumber)

db.close()

### Defintions
- drug colors
- feature colors

In [10]:
# define color code for individual significant drugs (static)
drug_colors = {'CLOUD031':'#8dd3c7','CLOUD053':'#ffffb3','CLOUD057':'#bebada','CLOUD089':'#fb8072','CLOUD112':'#80b1d3','CLOUD117':'#fdb462','CLOUD077':'#b3de69','CLOUD103':'#fccde5',
              'CLOUD115':'#c51b8a','CLOUD129':'#bc80bd','DMSO':'grey'}

feature_colors = {'AreaShape':'#D53D48', #red
                  'Intensity':'#BDCA27', 'RadialDistribution':'#BDCA27', #green
                  'Other':'grey', #grey
                  'Texture':'#F8B301', #orange
                  'Granularity':'#3AB9D1'} #blue



#create the string for selecting all features
selected_feature_string = ','.join(selected_Features)

In [11]:
## EXTRACT DMSO
####

# Establish connections
db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )

#define plate and batch
plate = 1315101
batch_ =  '2'

# create SQL string
images_dmso = Image_Number_For_DMSO[batch_][plate]
imageNumberString_dmso = ','.join([str(x) for x in images_dmso])
string = 'select ImageNumber,ObjectNumber,'+selected_feature_string+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_dmso+');'

# Extract only selected features (all DMSO cells)
DMSO_allFeatures = pandas.read_sql(string, con=db)
DMSO_allFeatures['Label'] = 'DMSO'

DMSO_allFeatures = DMSO_allFeatures.dropna()

db.close()

In [12]:
DMSO_allFeatures.head()

Unnamed: 0,ImageNumber,ObjectNumber,Cells_AreaShape_Compactness,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,Cells_AreaShape_MaxFeretDiameter,Cells_AreaShape_Zernike_2_0,Cells_AreaShape_Zernike_3_1,Cells_AreaShape_Zernike_4_2,Cells_AreaShape_Zernike_5_3,...,Nuclei_Granularity_5_DAPI,Nuclei_Granularity_6_DAPI,Nuclei_Granularity_7_DAPI,Nuclei_Granularity_8_DAPI,Nuclei_Granularity_9_DAPI,Nuclei_Intensity_IntegratedIntensityEdge_DAPI,Nuclei_Intensity_IntegratedIntensity_DAPI,Nuclei_Intensity_MADIntensity_DAPI,Nuclei_Intensity_StdIntensityEdge_DAPI,Label
0,54701,1,1.15982,0.685714,0.750649,19.4165,0.157546,0.019115,0.01148,0.006484,...,35.2459,4.19041,19.6766,2.96946,1.11507,0.859098,4.43896,0.003815,0.008469,DMSO
1,54701,2,1.02608,0.789272,0.874287,30.4631,0.114611,0.018427,0.021649,0.004243,...,14.7548,35.3576,0.0,5.55648,18.2801,0.818189,16.9778,0.007767,0.002083,DMSO
2,54701,3,1.04174,0.798611,0.875644,18.6011,0.099665,0.024889,0.02755,0.013928,...,12.9646,37.0991,12.2128,7.46238,0.80839,1.4145,8.13231,0.003876,0.003057,DMSO
3,54701,4,1.01102,0.72619,0.864777,27.7308,0.105424,0.035592,0.008966,0.01155,...,8.41062,11.1502,5.66984,28.7872,23.2242,1.16371,12.0065,0.002846,0.005323,DMSO
4,54701,5,1.17272,0.696759,0.572995,23.3452,0.104706,0.008888,0.028399,0.013669,...,8.25054,14.1547,27.1804,7.28085,3.47058,1.20975,5.92338,0.002312,0.004514,DMSO


In [13]:
## EXTRACT Drugs
####

# Establish connections
db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )

# Get all drugs for a choosen plate
images_drugs = []
image_to_drug = {}
for key in Image_Number_For_Drugs:
    for current_plate in Image_Number_For_Drugs[key]:
        if current_plate == plate:
            images_drugs.extend(Image_Number_For_Drugs[key][current_plate])
            for img in Image_Number_For_Drugs[key][current_plate]:
                image_to_drug[img] = key.split('_')[0]

# Create SQL string
imageNumberString_drug = ','.join([str(x) for x in images_drugs])
string = 'select ImageNumber,ObjectNumber,'+selected_feature_string+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_drug+');'

# Extract only selected features (all DMSO cells)
Drug_allFeatures = pandas.read_sql(string, con=db)
Drug_allFeatures['Label'] = 'Drug'
for key in image_to_drug:
    Drug_allFeatures.loc[Drug_allFeatures['ImageNumber'] == key,['Label']] = image_to_drug[key]

Drug_allFeatures = Drug_allFeatures.dropna()

db.close()

In [14]:
Drug_allFeatures.head()

Unnamed: 0,ImageNumber,ObjectNumber,Cells_AreaShape_Compactness,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,Cells_AreaShape_MaxFeretDiameter,Cells_AreaShape_Zernike_2_0,Cells_AreaShape_Zernike_3_1,Cells_AreaShape_Zernike_4_2,Cells_AreaShape_Zernike_5_3,...,Nuclei_Granularity_5_DAPI,Nuclei_Granularity_6_DAPI,Nuclei_Granularity_7_DAPI,Nuclei_Granularity_8_DAPI,Nuclei_Granularity_9_DAPI,Nuclei_Intensity_IntegratedIntensityEdge_DAPI,Nuclei_Intensity_IntegratedIntensity_DAPI,Nuclei_Intensity_MADIntensity_DAPI,Nuclei_Intensity_StdIntensityEdge_DAPI,Label
0,54745,1,1.5715,0.518804,0.140161,119.013,0.15466,0.024482,0.039582,0.016888,...,3.05456,9.27179,13.9509,12.287,12.0499,0.306844,5.09012,0.001816,0.000298,CLOUD057
1,54745,2,1.12421,0.590805,0.322748,101.178,0.181061,0.022004,0.025792,0.025306,...,4.35202,1.98926,4.64896,5.67418,0.0,0.524849,10.0542,0.000626,0.000483,CLOUD057
2,54745,3,2.08286,0.338741,0.159351,210.839,0.091311,0.068062,0.006494,0.014317,...,0.439525,4.98448,4.62558,10.5943,8.75994,0.517891,18.7899,0.00235,0.000616,CLOUD057
3,54745,4,1.30034,0.433946,0.221216,142.338,0.175356,0.033313,0.022932,0.01038,...,0.0,1.59313,14.9787,0.0,0.0,0.484443,11.3635,0.000839,0.000677,CLOUD057
4,54745,5,1.69427,0.456701,0.142397,199.7,0.144139,0.005285,0.028346,0.023184,...,7.03409,1.63458,5.15971,2.94333,36.2122,0.437949,12.2352,0.001572,0.000606,CLOUD057


#### Perform Scaling pooled scaling

In [15]:
DMSO_and_Drugs = pandas.concat([DMSO_allFeatures,Drug_allFeatures])
DMSO_and_Drugs_allFeatures_scaled = DMSO_and_Drugs.copy()


#scaler = RobustScaler()
scaler = StandardScaler()
DMSO_and_Drugs_allFeatures_scaled[selected_Features] = scaler.fit_transform(DMSO_and_Drugs[selected_Features])
DMSO_and_Drugs_allFeatures_scaled.head()

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Unnamed: 0,ImageNumber,ObjectNumber,Cells_AreaShape_Compactness,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,Cells_AreaShape_MaxFeretDiameter,Cells_AreaShape_Zernike_2_0,Cells_AreaShape_Zernike_3_1,Cells_AreaShape_Zernike_4_2,Cells_AreaShape_Zernike_5_3,...,Nuclei_Granularity_5_DAPI,Nuclei_Granularity_6_DAPI,Nuclei_Granularity_7_DAPI,Nuclei_Granularity_8_DAPI,Nuclei_Granularity_9_DAPI,Nuclei_Intensity_IntegratedIntensityEdge_DAPI,Nuclei_Intensity_IntegratedIntensity_DAPI,Nuclei_Intensity_MADIntensity_DAPI,Nuclei_Intensity_StdIntensityEdge_DAPI,Label
0,54701,1,-0.514161,1.386339,2.612965,-1.660736,-0.033693,-0.620967,-1.292899,-1.066245,...,2.035048,-1.179809,-0.05444,-0.726256,-0.547694,2.763631,-1.477996,0.375443,7.835386,DMSO
1,54701,2,-0.878324,2.428824,3.440865,-1.333047,-1.318084,-0.657443,-0.597935,-1.328942,...,0.281799,0.869265,-1.431992,-0.495131,1.515666,2.418568,1.422157,2.596924,0.838729,DMSO
2,54701,3,-0.835683,2.522837,3.449952,-1.684924,-1.765199,-0.314941,-0.194731,-0.193815,...,0.128627,0.983759,-0.576978,-0.324858,-0.584559,7.448385,-0.623748,0.409748,1.905716,DMSO
3,54701,4,-0.919331,1.793798,3.377185,-1.414098,-1.592911,0.252352,-1.464648,-0.47248,...,-0.261019,-0.72224,-1.035048,1.580304,2.109982,5.332999,0.272328,-0.169208,4.387827,DMSO
4,54701,5,-0.479035,1.497525,1.423366,-1.544194,-1.61439,-1.16301,-0.136714,-0.224205,...,-0.274715,-0.524711,0.470899,-0.341076,-0.264545,5.721341,-1.134659,-0.469411,3.50147,DMSO


### Plot results for DMSO and selected drugs (Distributions)

In [48]:
sns.set_style("whitegrid", {'axes.grid' : False})

make_plots = True

#check that folder exists
ensure_dir('../results/Investigate_CellularHeterogeneity/Penetrance_PooledScaled/DMSO/')
#sns.set()


is_normal = 0
#go through all selected features
for f in selected_Features:
    
    #extract DMSO values for  this specific feature
    feature_values = DMSO_and_Drugs_allFeatures_scaled.loc[DMSO_and_Drugs_allFeatures_scaled['Label'] == 'DMSO'][f].values
    
    #Test for normality are essentiality useless (for small datasets chance of not enough power, while for large dataset everything gets rejected as non normal)
    
    pvals = []
    for i in range(0,1000):
        pval_normal = stats.normaltest(np.random.choice(feature_values,50))[1]
        pvals.append(pval_normal)
    #pval_normal2 = stats.shapiro(feature_values)[1]
    
    if np.mean(pvals) >= 0.05:
        is_normal += 1
    
    
    if make_plots:
        plt.hist(feature_values,bins=100, color='grey',density=True)
        plt.title(f + 'Normal: %.2f' % np.mean(pvals))
        plt.savefig('../results/Investigate_CellularHeterogeneity/Penetrance_PooledScaled/DMSO/'+f+'.pdf')
        plt.close()

print len(selected_Features)
print is_normal

78
30


In [17]:
ensure_dir('../results/Investigate_CellularHeterogeneity/Penetrance_PooledScaled/Drugs/')

# Find drugs name
all_drugs = list(set(image_to_drug.values()))

#go through all selected features
for f in selected_Features:
    
    #extract the DMSO values
    feature_values_DMSO = DMSO_and_Drugs_allFeatures_scaled.loc[DMSO_and_Drugs_allFeatures_scaled['Label'] == 'DMSO'][f].values
    for drug in all_drugs:
        
        #extract drug values
        feature_values = DMSO_and_Drugs_allFeatures_scaled.loc[DMSO_and_Drugs_allFeatures_scaled['Label'] == drug][f].values

        #overlay the two distributions
        plt.hist(feature_values_DMSO,bins='doane', color='grey', alpha=0.5, density=True)
        plt.hist(feature_values,bins='doane', color=drug_colors[drug], alpha=0.5, density=True)
        plt.savefig('../results/Investigate_CellularHeterogeneity/Penetrance_PooledScaled/Drugs/'+f+'_'+drug+'.pdf')
        plt.close()

In [46]:
#colors for features
feature_type_colors = []
compartment_type_colors = []

# contains KS results
feature_results = []

# contains percentile results
feature_results_effect = []
for f in selected_Features:
    
    compartment,featuretype,_ = f.split('_')[0:3]
    
    
    if featuretype in feature_colors.keys():
        feature_type_colors.append(feature_colors[featuretype])
    else:
        feature_type_colors.append(feature_colors['Other'])
    
    if compartment == 'Cells':
        compartment_type_colors.append('#a6611a')
    else:
        compartment_type_colors.append('#018571')
    
    #Get DMSO values for specific feature 
    feature_values_DMSO = DMSO_and_Drugs_allFeatures_scaled.loc[DMSO_and_Drugs_allFeatures_scaled['Label'] == 'DMSO'][f].values


    #Define the top5 , top95 percentiles
    low_5 = np.percentile(feature_values_DMSO,5)
    top_95 = np.percentile(feature_values_DMSO,95)
    
    #temporary results (each row contains one feature - all drugs)
    tmp = []
    tmp2 = []
    
    #go through all drugs
    for drug in all_drugs:
        
        # Get Drug values for specific feature
        feature_values_drug = DMSO_and_Drugs_allFeatures_scaled.loc[DMSO_and_Drugs_allFeatures_scaled['Label'] == drug][f].values
        
        #Number of significant cells
        tmp2.append(len([x for x in feature_values_drug if x < low_5 or x > top_95])/float(len(feature_values_drug)))
        
        #Compare curves
        tmp.append(stats.ks_2samp(feature_values_drug,feature_values_DMSO)[0])

    #add results to overall results lists
    feature_results.append(tmp)
    feature_results_effect.append(tmp2)

In [49]:
#sns.set()
sns.clustermap(data=feature_results, xticklabels=all_drugs,yticklabels=selected_Features, row_colors=[feature_type_colors,compartment_type_colors])
#sns.set(font_scale=0.5)
plt.savefig('../results/Investigate_CellularHeterogeneity/Penetrance_PooledScaled/Clustermap_KS_Test.pdf')
plt.close()

In [50]:
#sns.set()
sns.clustermap(data=feature_results_effect, xticklabels=all_drugs,yticklabels=selected_Features, row_colors=[feature_type_colors,compartment_type_colors])
sns.set(font_scale=5.5)
plt.savefig('../results/Investigate_CellularHeterogeneity/Penetrance_PooledScaled/Clustermap_Percentiles.pdf')
plt.close()

In [25]:
sns.set()
plt.scatter(feature_results,feature_results_effect)
plt.plot([0,1],[0,1],ls='--',c='grey')
plt.xlabel('Penetrance')
plt.ylabel('Effect')
plt.savefig('../results/Investigate_CellularHeterogeneity/Penetrance_PooledScaled/Penetrance_vs_Effect.pdf')
plt.close()

### Make PCA (all features)

In [39]:
db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )
plate = 1315101
batch_ =  '2'

images_dmso = Image_Number_For_DMSO[batch_][plate]
imageNumberString_dmso = ','.join([str(x) for x in images_dmso])
string = 'select * from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_dmso+');'
DMSO_allFeatures = pandas.read_sql(string, con=db)
DMSO_allFeatures['Label'] = 'DMSO'

for entry in list(Image_Number_For_Drugs.keys()):
    print entry
    drug,batch_ = entry.split('_')
    batch_ = batch_[5]
    
    
    images_drug = Image_Number_For_Drugs[entry][plate]
    imageNumberString_drug = ','.join([str(x) for x in images_drug])
    

    
    string = 'select * from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_drug+');'
    drug_allFeatures = pandas.read_sql(string, con=db)
    drug_allFeatures['Label'] = 'Drug'

    # Put both dataframes together
    DMSO_drug_allFeatures = pandas.concat([drug_allFeatures,DMSO_allFeatures])
    to_remove = [x for x in DMSO_drug_allFeatures.columns if 'Location' in x  or 'Center' in x]
    DMSO_drug_allFeatures = DMSO_drug_allFeatures.drop(to_remove, axis=1)
    DMSO_drug_allFeatures = DMSO_drug_allFeatures.dropna()
    


    y = DMSO_drug_allFeatures['Label'].values
    x = DMSO_drug_allFeatures.iloc[:,3:-1].values
    # Standardizing the features
    x = StandardScaler().fit_transform(x)

    pca = PCA(n_components=2)
    Drug_DMSO_Fit = pca.fit_transform(x)


    pca_drug = []
    pca_DMSO = []
    for label,element in zip(y,list(Drug_DMSO_Fit)):

        if label == 'Drug':
            pca_drug.append(element)
        else:
            pca_DMSO.append(element)

    pca_drug = np.array(pca_drug)
    pca_DMSO = np.array(pca_DMSO)


    ensure_dir('../results/Investigate_CellularHeterogeneity/'+drug+'/')
    #plt.scatter(pca_drug[:,0],pca_drug[:,1], alpha=0.4)
    #plt.scatter(pca_DMSO[:,0],pca_DMSO[:,1], alpha=0.4)
    #plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/Scatter_AllFeatures.pdf')
    #plt.show()
    #plt.close()


    upper = 99.5
    lower = 0.5
    x_min = min([np.percentile(pca_drug[:,0],lower),np.percentile(pca_DMSO[:,0],lower)])
    x_max = max([np.percentile(pca_drug[:,0],upper),np.percentile(pca_DMSO[:,0],upper)])
    y_min = min([np.percentile(pca_drug[:,1],lower),np.percentile(pca_DMSO[:,1],lower)])
    y_max = max([np.percentile(pca_drug[:,1],upper),np.percentile(pca_DMSO[:,1],upper)])

    #bw = 1.5
    sns.kdeplot(pca_drug[:,0],pca_drug[:,1],shade_lowest=False, alpha=0.5)
    sns.kdeplot(pca_DMSO[:,0],pca_DMSO[:,1],shade_lowest=False, alpha=0.5)
    plt.xlim([x_min,x_max])
    plt.ylim([y_min,y_max])
    plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/ContourPlot_AllFeatures.pdf')
    plt.close()



    sns.jointplot(pca_drug[:,0],pca_drug[:,1], kind='kde', bw = 'scott', color=drug_colors[drug], shade_lowest=False, alpha=0.5, xlim=[x_min,x_max], ylim=[y_min,y_max])
    plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/JoinPlot_Drug_AllFaetures.pdf')
    plt.close()

    sns.jointplot(pca_DMSO[:,0],pca_DMSO[:,1], kind='kde', bw = 'scott', color="#D4D4D4", shade_lowest=False,alpha=0.5, xlim=[x_min,x_max], ylim=[y_min,y_max])
    plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/JoinPlot_DMSO_AllFaetures.pdf')
    plt.close()
    
    

CLOUD112_Batch2
CLOUD057_Batch2
CLOUD089_Batch2
CLOUD031_Batch2
CLOUD053_Batch2
CLOUD117_Batch2
CLOUD103_Batch2
CLOUD115_Batch2
CLOUD077_Batch2
CLOUD129_Batch2


### Make Violin plot selected features

In [15]:
db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )


#features = ['Cells_Intensity_StdIntensity_MitoTracker','Cells_Granularity_1_BetaTubulin','Nuclei_AreaShape_MaximumRadius','Cells_AreaShape_MaxFeretDiameter']
features = selected_Features

plate = 1315101
#batch_ = 2

drug_feature_results_to_plot = {}
for entry in Image_Number_For_Drugs:
    drug,batch_ = entry.split('_')
    batch_ = batch_[5]
    drug_feature_results_to_plot[entry] = {} 
    print drug
    
    
    images_drug = Image_Number_For_Drugs[entry][plate]
    imageNumberString_drug = ','.join([str(x) for x in images_drug])

    images_dmso = Image_Number_For_DMSO[batch_][plate]
    imageNumberString_dmso = ','.join([str(x) for x in images_dmso])
    
    
    for feature in features:
        
        
        ensure_dir('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/')
        string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_drug+');'
        result_drug = list(pandas.read_sql(string, con=db)[feature].values)
        result_drug = reject_outliers_2([x for x in result_drug if str(x) != 'nan'],6)


        string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_dmso+');'
        result_dmso = list(pandas.read_sql(string, con=db)[feature].values)
        result_dmso = reject_outliers_2([x for x in result_dmso if str(x) != 'nan'],6)

        drug_feature_results_to_plot[entry][feature] = {'Drug':result_drug, 'DMSO':result_dmso}

db.close()

CLOUD112
CLOUD057
CLOUD089
CLOUD031
CLOUD053
CLOUD117
CLOUD103
CLOUD115
CLOUD077
CLOUD129


In [41]:
#drug_colors = {'CLOUD031':'#8dd3c7','CLOUD053':'#ffffb3','CLOUD057':'#bebada','CLOUD089':'#fb8072','CLOUD112':'#80b1d3','CLOUD117':'#fdb462','CLOUD077':'#b3de69','CLOUD103':'#fccde5',
#              'CLOUD115':'#d9d9d9','CLOUD129':'#bc80bd','DMSO':'grey',}

for feature in features:
    
    data = []
    drug_names = []
    

    for entry in list(Image_Number_For_Drugs.keys()):
        drug,batch_ = entry.split('_')
        
        drug_names.append((drug,np.median(drug_feature_results_to_plot[entry][feature]['Drug'])))
        data.append((drug_feature_results_to_plot[entry][feature]['Drug'],np.median(drug_feature_results_to_plot[entry][feature]['Drug'])))
        
    #print data
    data.sort(key = operator.itemgetter(1))
    drug_names.sort(key = operator.itemgetter(1))

    data = [x[0] for x in data]
    drug_names = [x[0] for x in drug_names]
            
    
    data.append(drug_feature_results_to_plot[entry][feature]['DMSO'])
    drug_names.append('DMSO')
    
    Percent_95 = np.percentile(drug_feature_results_to_plot[entry][feature]['DMSO'],90)
    Percent_5 = np.percentile(drug_feature_results_to_plot[entry][feature]['DMSO'],10)
    my_pal = {0: drug_colors[drug_names[0]], 1: drug_colors[drug_names[1]], 2:drug_colors[drug_names[2]],
              3:drug_colors[drug_names[3]],4:drug_colors[drug_names[4]],5:drug_colors[drug_names[5]],
              6:drug_colors[drug_names[6]],7:drug_colors[drug_names[7]],8:drug_colors[drug_names[8]]
              ,9:drug_colors[drug_names[9]],10:drug_colors[drug_names[10]]}

    #sns.violinplot(data=data,scale='width',bw='scott', palette='Paired', orient='h')
    sns.violinplot(data=data,scale='width',bw='scott', palette=my_pal, orient='h')
    plt.axvline(Percent_95,ls='--',color='grey')
    plt.axvline(Percent_5,ls='--',color='grey')
    plt.yticks(range(0,len(data)+1),drug_names, fontsize=5)
    plt.ylabel('Treatment', fontsize=5)
    plt.xticks(fontsize=5)
    plt.xlabel(feature, fontsize=5)
    #sns.swarmplot(data=data)
    plt.savefig('../results/Investigate_CellularHeterogeneity/Final/'+str(feature)+'_Violin.pdf')
    
    #plt.show()
    plt.close()
    
    

### Analyse Features for selected Drugs

In [43]:
fp_out = open('../results/Investigate_CellularHeterogeneity/Result_Overview.csv','w')
fp_out.write('Batch,Drug,Plate,Feature,Cohens"D,Abs(CohenD),Coefficient_Variation,KS_Normality,MW_PVal\n')

#selected_Features = ['Cells_Intensity_StdIntensity_MitoTracker','Cells_Granularity_12_BetaTubulin','Nuclei_AreaShape_MaximumRadius','Cells_AreaShape_MaxFeretDiameter']
selected_Features = ['Cells_AreaShape_FormFactor','Nuclei_AreaShape_MaxFeretDiameter','Cells_Granularity_1_BetaTubulin','Nuclei_Granularity_8_DAPI','Cells_Intensity_StdIntensity_MitoTracker','Nuclei_Intensity_IntegratedIntensity_DAPI']



db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )

#print Image_Number_For_Drugs
for entry in Image_Number_For_Drugs:
    print entry
    drug,batch_ = entry.split('_')
    batch_ = batch_[5]
    
    #plates = list(Image_Number_For_Drugs[entry].keys())
    plates = [1315101]
    
    for plate in plates:
        images_drug = Image_Number_For_Drugs[entry][plate]
        imageNumberString_drug = ','.join([str(x) for x in images_drug])

        images_dmso = Image_Number_For_DMSO[batch_][plate]
        imageNumberString_dmso = ','.join([str(x) for x in images_dmso])

        for feature in selected_Features:
            ensure_dir('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/')
            string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_drug+');'
            
           
            result_drug = list(pandas.read_sql(string, con=db)[feature].values)
            result_drug = reject_outliers_2([x for x in result_drug if str(x) != 'nan'],6)


            string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_dmso+');'
            result_dmso = list(pandas.read_sql(string, con=db)[feature].values)
            result_dmso = reject_outliers_2([x for x in result_dmso if str(x) != 'nan'],6)

            #sns.violinplot(data=[result_drug,result_dmso],bw=0.5, cut=50)
            #plt.show()
            cd = cohen_d(result_drug,result_dmso)
            mw_Pval = min([1,mw(result_drug,result_dmso)[1] * (len(selected_Features) * len(list(Image_Number_For_Drugs[entry])) * 2)])

            coev_var = np.std(result_drug)/np.mean(result_drug)
            #KS_Normality = stats.kstest(result_drug, 'norm')[1]
            KS_Normality = stats.shapiro(result_drug)[1]

            fp_out.write(batch_+','+drug+','+str(plate)+','+feature+','+str(cd)+','+str(abs(cd))+','+str(coev_var)+','+str(KS_Normality)+','+str(mw_Pval)+'\n')
            #continue

            #bins = 14 prettier
            plt.hist(result_drug, bins = 20, color = drug_colors[drug], alpha=0.3, density=True)
            plt.hist(result_dmso, bins = 20, color = 'grey', alpha=0.3,density=True)
            plt.xlim([min([np.percentile(result_drug,1),np.percentile(result_dmso,1)]),max([np.percentile(result_drug,99),np.percentile(result_dmso,99)])])
            plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/'+str(plate)+'_Hist.pdf')
            #plt.show()
            plt.close()


            plt.boxplot([result_drug,result_dmso], whis = 1.5, showfliers = True)
            plt.xticks([1,2],[drug,'DMSO'])
            plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/'+str(plate)+'_Box.pdf')
            #plt.show()
            plt.close()

    
db.close()

CLOUD112_Batch2
CLOUD057_Batch2
CLOUD089_Batch2
CLOUD031_Batch2
CLOUD053_Batch2
CLOUD117_Batch2
CLOUD103_Batch2
CLOUD115_Batch2
CLOUD077_Batch2
CLOUD129_Batch2


### Load actual cells

In [14]:
fp_out = open('../results/Investigate_CellularHeterogeneity/Result_Overview.csv','w')
fp_out.write('Batch,Drug,Plate,Feature,Cohens"D,Abs(CohenD),Coefficient_Variation,KS_Normality,MW_PVal\n')

#selected_Features = ['Cells_Intensity_StdIntensity_MitoTracker','Cells_Granularity_12_BetaTubulin','Nuclei_AreaShape_MaximumRadius','Cells_AreaShape_MaxFeretDiameter']

db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )

#print Image_Number_For_Drugs
for entry in Image_Number_For_Drugs:
    print entry
    drug,batch_ = entry.split('_')
    batch_ = batch_[5]
    
    plates = list(Image_Number_For_Drugs[entry].keys())
    #print plates
    
    for plate in plates:
        images_drug = Image_Number_For_Drugs[entry][plate]
        imageNumberString_drug = ','.join([str(x) for x in images_drug])

        images_dmso = Image_Number_For_DMSO[batch_][plate]
        imageNumberString_dmso = ','.join([str(x) for x in images_dmso])

        for feature in selected_Features:
            ensure_dir('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/')
            string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_drug+');'
            
           
            result_drug = list(pandas.read_sql(string, con=db)[feature].values)
            result_drug = reject_outliers_2([x for x in result_drug if str(x) != 'nan'],6)


            string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_dmso+');'
            result_dmso = list(pandas.read_sql(string, con=db)[feature].values)
            result_dmso = reject_outliers_2([x for x in result_dmso if str(x) != 'nan'],6)

            #sns.violinplot(data=[result_drug,result_dmso],bw=0.5, cut=50)
            #plt.show()
            cd = cohen_d(result_drug,result_dmso)
            mw_Pval = min([1,mw(result_drug,result_dmso)[1] * (len(selected_Features) * len(list(Image_Number_For_Drugs[entry])) * 2)])

            coev_var = np.std(result_drug)/np.mean(result_drug)
            #KS_Normality = stats.kstest(result_drug, 'norm')[1]
            KS_Normality = stats.shapiro(result_drug)[1]

            fp_out.write(batch_+','+drug+','+str(plate)+','+feature+','+str(cd)+','+str(abs(cd))+','+str(coev_var)+','+str(KS_Normality)+','+str(mw_Pval)+'\n')
            #continue

            #bins = 14 prettier
            plt.hist(result_drug, bins = 20, color = '#3AB9D1', alpha=0.3, density=True)
            plt.hist(result_dmso, bins = 20, color = 'grey', alpha=0.3,density=True)
            plt.xlim([min([np.percentile(result_drug,1),np.percentile(result_dmso,1)]),max([np.percentile(result_drug,99),np.percentile(result_dmso,99)])])
            plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/'+str(plate)+'_Hist.pdf')
            #plt.show()
            plt.close()


            plt.boxplot([result_drug,result_dmso], whis = 1.5, showfliers = True)
            plt.xticks([1,2],[drug,'DMSO'])
            plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/'+str(plate)+'_Box.pdf')
            #plt.show()
            plt.close()

    
db.close()

CLOUD112_Batch2
CLOUD057_Batch2
CLOUD089_Batch2
CLOUD031_Batch2
CLOUD053_Batch2
CLOUD117_Batch2
CLOUD103_Batch2
CLOUD115_Batch2
CLOUD077_Batch2
CLOUD129_Batch2


### Choose specific features / plate

In [23]:
db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )


features = ['Cells_Intensity_StdIntensity_MitoTracker','Cells_Granularity_12_BetaTubulin','Nuclei_AreaShape_MaximumRadius','Cells_AreaShape_MaxFeretDiameter']
plate = 1315111
#batch_ = 2

drug_feature_results_to_plot = {}
for entry in Image_Number_For_Drugs:
    drug,batch_ = entry.split('_')
    batch_ = batch_[5]
    drug_feature_results_to_plot[entry] = {} 
    print drug
    
    
    images_drug = Image_Number_For_Drugs[entry][plate]
    imageNumberString_drug = ','.join([str(x) for x in images_drug])

    images_dmso = Image_Number_For_DMSO[batch_][plate]
    imageNumberString_dmso = ','.join([str(x) for x in images_dmso])
    
    
    for feature in features:
        
        
        ensure_dir('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/')
        string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_drug+');'
        result_drug = list(pandas.read_sql(string, con=db)[feature].values)
        result_drug = reject_outliers_2([x for x in result_drug if str(x) != 'nan'],6)


        string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018Batch'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_dmso+');'
        result_dmso = list(pandas.read_sql(string, con=db)[feature].values)
        result_dmso = reject_outliers_2([x for x in result_dmso if str(x) != 'nan'],6)

        drug_feature_results_to_plot[entry][feature] = {'Drug':result_drug, 'DMSO':result_dmso}

db.close()

CLOUD112
CLOUD077
CLOUD115
CLOUD057
CLOUD089


In [18]:


for feature in features:
    
    data = []
    drug_names = []
    for entry in list(Image_Number_For_Drugs.keys()):
        drug,batch_ = entry.split('_')
        drug_names.append(drug)
        data.append(drug_feature_results_to_plot[entry][feature]['Drug'])
    
    data.append(drug_feature_results_to_plot[entry][feature]['DMSO'])
    drug_names.append('DMSO')
    
    Percent_95 = np.percentile(drug_feature_results_to_plot[entry][feature]['DMSO'],95)
    Percent_5 = np.percentile(drug_feature_results_to_plot[entry][feature]['DMSO'],5)

    sns.violinplot(data=data,scale='width')
    plt.axhline(Percent_95,ls='--',color='grey')
    plt.axhline(Percent_5,ls='--',color='grey')
    plt.xticks(range(0,len(data)+1),drug_names, fontsize=5)
    plt.xlabel('Treatment')
    plt.ylabel(feature)
    #sns.swarmplot(data=data)
    plt.savefig('../results/Investigate_CellularHeterogeneity/Final/'+str(feature)+'_Violin.pdf')
    
    #plt.show()
    plt.close()
    
    

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


In [20]:
Image_Number_For_Drugs = {'Batch1':{},'Batch2':{}}

db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )

batches = ['1','2']
for batch_ in batches:
    for drug in significant_perturbations['Batch'+batch_]:

           
        if drug == 'DMSO':
            string = 'select ImageNumber,Image_Metadata_Plate from DPN1018Batch'+batch_+'Per_Image where Image_Metadata_ID_A like "DMSO" and Image_Metadata_ID_B like "None";'
            ImageNumbers = pandas.read_sql(string, con=db)
            
            for line in ImageNumbers.iterrows():
                Drug_ImageNumber = line[1][0]
                Drug_PlateNumber = line[1][1]
                
                if drug not in Image_Number_For_Drugs['Batch'+batch_]:
                    Image_Number_For_Drugs['Batch'+batch_][drug] = {Drug_PlateNumber:[Drug_ImageNumber]}
                elif Drug_PlateNumber not in  Image_Number_For_Drugs['Batch'+batch_][drug]:
                    Image_Number_For_Drugs['Batch'+batch_][drug][Drug_PlateNumber] = [Drug_ImageNumber]
                else:
                    Image_Number_For_Drugs['Batch'+batch_][drug][Drug_PlateNumber].append(Drug_ImageNumber)

        
        elif drug_decay[drug]['Batch'+batch_] == True:
         
            string = 'select ImageNumber,Image_Metadata_Plate from DPN1018Batch'+batch_+'Per_Image where Image_Metadata_ID_A like "'+drug+'" and Image_Metadata_ID_B like "DMSO";'

            ImageNumbers = pandas.read_sql(string, con=db)
            #print(ImageNumbers)

            for line in ImageNumbers.iterrows():
                Drug_ImageNumber = line[1][0]
                Drug_PlateNumber = line[1][1]
                
                if drug not in Image_Number_For_Drugs['Batch'+batch_]:
                    Image_Number_For_Drugs['Batch'+batch_][drug] = {Drug_PlateNumber:[Drug_ImageNumber]}
                elif Drug_PlateNumber not in  Image_Number_For_Drugs['Batch'+batch_][drug]:
                    Image_Number_For_Drugs['Batch'+batch_][drug][Drug_PlateNumber] = [Drug_ImageNumber]
                else:
                    Image_Number_For_Drugs['Batch'+batch_][drug][Drug_PlateNumber].append(Drug_ImageNumber)

db.close()

In [30]:
fp_out = open('../results/Investigate_CellularHeterogeneity/Result_Overview.csv','w')
fp_out.write('Batch,Drug,Plate,Feature,Cohens"D,Abs(CohenD),Coefficient_Variation,KS_Normality,MW_PVal\n')


db = MySQLdb.connect("menchelabdb.int.cemm.at","root","cqsr4h","ImageAnalysisDDI" )

for batch_ in Image_Number_For_Drugs:
    print batch_
    for drug in Image_Number_For_Drugs[batch_]:
       
        
        for plate in list(Image_Number_For_Drugs[batch_][drug])[0:1]:
            images_drug = Image_Number_For_Drugs[batch_][drug][plate]
            imageNumberString_drug = ','.join([str(x) for x in images_drug])
            
            images_dmso = Image_Number_For_Drugs[batch_]['DMSO'][plate]
            imageNumberString_dmso = ','.join([str(x) for x in images_dmso])
            
        
            for feature in selected_Features[0:2]:
                ensure_dir('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/')
                string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_drug+');'
                result_drug = list(pandas.read_sql(string, con=db)[feature].values)
                result_drug = [x for x in result_drug if str(x) != 'nan']
                
                
                string = 'select ImageNumber,ObjectNumber,'+feature+' from DPN1018'+batch_+'Per_Object where ImageNumber in ('+imageNumberString_dmso+');'
                result_dmso = list(pandas.read_sql(string, con=db)[feature].values)
                result_dmso = [x for x in result_dmso if str(x) != 'nan']
                
                #sns.violinplot(data=[result_drug,result_dmso],bw=0.5, cut=50)
                #plt.show()
                cd = cohen_d(result_drug,result_dmso)
                mw_Pval = min([1,mw(result_drug,result_dmso)[1] * (len(selected_Features) * len(list(Image_Number_For_Drugs[batch_][drug])) * 2)])

                coev_var = np.std(result_drug)/np.mean(result_drug)
                #KS_Normality = stats.kstest(result_drug, 'norm')[1]
                KS_Normality = stats.shapiro(result_drug)[1]
                
                fp_out.write(batch_+','+drug+','+str(plate)+','+feature+','+str(cd)+','+str(abs(cd))+','+str(coev_var)+','+str(KS_Normality)+','+str(mw_Pval)+'\n')
                #continue
    
                #bins = 14 prettier
                plt.hist(result_drug, bins = 20, color = '#3AB9D1', alpha=0.3, density=True)
                plt.hist(result_dmso, bins = 20, color = 'grey', alpha=0.3,density=True)
                plt.xlim([min([np.percentile(result_drug,1),np.percentile(result_dmso,1)]),max([np.percentile(result_drug,99),np.percentile(result_dmso,99)])])
                plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/'+str(plate)+'_Hist.pdf')
                #plt.show()
                plt.close()
                
                
                plt.boxplot([result_drug,result_dmso], whis = 1.5, showfliers = False)
                plt.xticks([1,2],[drug,'DMSO'])
                plt.savefig('../results/Investigate_CellularHeterogeneity/'+drug+'/'+feature+'/'+str(plate)+'_Box.pdf')
                #plt.show()
                plt.close()
fp_out.close()

Batch2
Batch1
