# Analysis of GFP mean and integrated intensity vs GFP concentration

## Config

### The following code imports and declares functions used for the processing:

In [None]:
#################################
#  Don't modify the code below  #
#################################

import intake_io
import os
import re
import numpy as np
import pylab as plt
import seaborn as sns
from skimage import io
import pandas as pd
from tqdm import tqdm
from skimage.measure import regionprops_table

from am_utils.utils import walk_dir, imsave
from am_utils.parallel import run_parallel

from punctatools.lib.preprocess import rescale_intensity

## Data & parameters

### Data
`input_dir`: folder with images to be analyzed

`output_dir`: folder to save results


## Specify data paths and analysis parameters

### Please provide data paths:

In [None]:
input_dir = "/research/sharedresources/cbi/data_exchange/kriwagrp/0520201_GFPcalibration/converted_tiff"
output_dir = "/research/sharedresources/cbi/data_exchange/kriwagrp/0520201_GFPcalibration"

### The following code lists all datasets in the input directory:

In [None]:
#################################
#  Don't modify the code below  #
#################################
samples = walk_dir(input_dir)

print(f'{len(samples)} images were found:')
print(np.array(samples))

### The following code loads a sample dataset:

In [None]:
#################################
#  Don't modify the code below  #
#################################

sample = samples[np.random.randint(len(samples))]
dataset = intake_io.imload(sample)
print(dataset)


### The following code displays the maxprojection of the puncta channel:

In [None]:
#################################
#  Don't modify the code below  #
#################################

plt.figure(figsize=(10,10))
io.imshow(rescale_intensity(dataset['image'].data.max(0)))

### The following code quantifies all input images:

In [None]:
%%time
#################################
#  Don't modify the code below  #
#################################

def quantify(item, input_dir, output_dir,  **kwargs_to_ignore):
    sample = item
    dataset = intake_io.imload(sample)
      
    puncta_signal = np.array(dataset['image'].data)

    stats = pd.DataFrame(regionprops_table(label_image=np.ones_like(puncta_signal),
                                           intensity_image=puncta_signal,
                                           properties=['area', 'mean_intensity']))

    stats = stats.rename(columns={'area': 'image volume pix', 'mean_intensity':  'GFP mean intensity per image'}) 
    stats['GFP integrated intensity per image'] = stats['GFP mean intensity per image'] * stats['image volume pix']
      
    condition = sample.split('/')[-2]
    p_nm = re.compile(rf'([0-9]*\.?[0-9]+)nM')
    p_um = re.compile(rf'([0-9]*\.?[0-9]+)uM')
    conc_nM = 0
    if len(p_nm.findall(condition)) > 0:
        conc_nM = float(p_nm.findall(condition)[0])
    if len(p_um.findall(condition)) > 0:
        conc_nM = float(p_um.findall(condition)[0]) * 1000
    
    stats['condition'] = condition
    stats['GFP concentration nM'] = conc_nM
    stats['sample'] = sample.split('/')[-1]
    
    sample_name = sample[len(input_dir):].replace(sample.split('.')[-1], '')
    
    # save the stats
    os.makedirs(os.path.dirname(output_dir + '/quantification' + sample_name + 'csv'), exist_ok=True)
    stats.to_csv(output_dir + '/quantification' + sample_name + 'csv', index=False)
    
    return

# specify the analysis arguments
kwargs = dict()
kwargs['items'] = samples
kwargs['input_dir'] = input_dir
kwargs['output_dir'] = output_dir

for item in tqdm(samples):
    quantify(item=item, **kwargs)

# combine the cell stats
print('Combining stats...')
stats = pd.DataFrame()
for fn in walk_dir(os.path.join(output_dir, 'quantification')):
    stats = pd.concat([stats, pd.read_csv(fn)], ignore_index=False)
stats.to_csv(output_dir + '/quantification.csv', index=False)

In [None]:
stats

### The following code plots cell stats over conditions:

In [None]:
#################################
#  Don't modify the code below  #
#################################
os.makedirs(output_dir + '/plots', exist_ok=True)
stats = pd.read_csv(output_dir + '/quantification.csv')
for col in ['GFP concentration nM', 'GFP mean intensity per image', 'GFP integrated intensity per image']:
    stats['Log ' + col] = np.log10(stats[col])

plt.figure(figsize=(10, 6))
ax = sns.scatterplot(x = 'GFP concentration nM', y='GFP mean intensity per image', data=stats) 
plt.savefig(output_dir + '/plots/' + 'mean_intensity_vs_GFP_concentration.png')   

plt.figure(figsize=(10, 6))
ax = sns.scatterplot(x = 'GFP concentration nM', y='GFP integrated intensity per image', data=stats) 
plt.savefig(output_dir + '/plots/' + 'integrated_intensity_vs_GFP_concentration.png')   

plt.figure(figsize=(10, 6))
ax = sns.scatterplot(x = 'Log ' + 'GFP concentration nM', y='Log ' + 'GFP mean intensity per image', data=stats) 
plt.savefig(output_dir + '/plots/' + 'mean_intensity_vs_GFP_concentration_logscale.png')   

plt.figure(figsize=(10, 6))
ax = sns.scatterplot(x = 'Log ' + 'GFP concentration nM', y='Log ' + 'GFP integrated intensity per image', data=stats) 
plt.savefig(output_dir + '/plots/' + 'integrated_intensity_vs_GFP_concentration_logscale.png')   