# Can i use dMtb to assess whether it is uptake versus transfer

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from macrohet import visualise
from tqdm.auto import tqdm
import os
import warnings
from matplotlib.lines import Line2D

warnings.simplefilter(action='ignore', category=FutureWarning)

# Conversion factor
meters_per_pixel = 1.4949402023919043E-07  # Micrometers per pixel
micrometers_per_pixel = meters_per_pixel*1E6 # Micrometers per pixel

# set display params
sns.set(style = 'white')
# Set the default font to Helvetica
mpl.rcParams['font.family'] = 'Nimbus Sans'
# import matplotlib as mpl
# mpl.rcdefaults()
# Get the color palette of the PiYG color map
expanded_piyg = visualise.color_palette('expanded_piyg').colors
sns.set_palette(expanded_piyg)

In [3]:
output_dir = '/mnt/SYNO/macrohet_syno/results/manuscript/uptake_vs_transfer'#/mnt/DATA/macrohet/results/preliminary_sc_measures/cumulative_sc_plots/thresholded_480/'#'/Users/dayn/data/macrohet_mac/temp_results/'#
os.makedirs(output_dir, exist_ok = True)

In [4]:
base_dir = f'/mnt/SYNO/macrohet_syno/'

In [5]:
sc_df_fn = os.path.join(base_dir, 'results/dfs/sc_df.pkl')

In [6]:
df = pd.read_pickle(sc_df_fn)

In [7]:
df

Unnamed: 0,Time (hours),Mtb Area (µm),dMtb Area (µm),Mphi Area (µm),dMphi Area (µm),Infection Status,Initial Infection Status,Final Infection Status,x,y,...,Strain,Compound,Concentration,Cell ID,Acquisition ID,Experiment ID,Unique ID,ID,Edge Status,Uptake
0,0.0,0.424621,-0.424621,459.082108,-92.746118,False,False,False,457.172943,8.791715,...,RD1,CTRL,EC0,426,"(3, 4)",PS0000,426.3.4,426.3.4.PS0000,True,False
1,1.0,0.000000,-0.424621,372.839393,-92.746118,False,False,False,459.138947,6.718642,...,RD1,CTRL,EC0,426,"(3, 4)",PS0000,426.3.4,426.3.4.PS0000,True,False
2,2.0,0.000000,-0.424621,423.168130,-92.746118,False,False,False,460.555237,10.785886,...,RD1,CTRL,EC0,426,"(3, 4)",PS0000,426.3.4,426.3.4.PS0000,True,False
3,3.0,0.335227,-0.424621,445.561289,-92.746118,False,False,False,455.878815,11.143067,...,RD1,CTRL,EC0,426,"(3, 4)",PS0000,426.3.4,426.3.4.PS0000,True,False
4,4.0,0.000000,-0.424621,487.040034,-92.746118,False,False,False,462.634186,14.050420,...,RD1,CTRL,EC0,426,"(3, 4)",PS0000,426.3.4,426.3.4.PS0000,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1797106,74.5,1.117423,1.095075,752.584461,55.088959,False,False,False,227.093262,130.358139,...,RD1,BDQ,EC99,1874,"(6, 12)",ND0003,1874.6.12,1874.6.12.ND0003,False,False
1797107,75.0,2.033710,1.095075,616.035357,55.088959,True,False,False,226.622925,130.532120,...,RD1,BDQ,EC99,1874,"(6, 12)",ND0003,1874.6.12,1874.6.12.ND0003,False,False
1797108,75.5,1.430302,1.095075,579.182743,55.088959,False,False,False,225.366608,129.932709,...,RD1,BDQ,EC99,1874,"(6, 12)",ND0003,1874.6.12,1874.6.12.ND0003,False,False
1797109,76.0,1.989013,1.095075,679.147414,55.088959,True,False,False,226.424683,128.489410,...,RD1,BDQ,EC99,1874,"(6, 12)",ND0003,1874.6.12,1874.6.12.ND0003,False,False


# Need to change this category to include a dMtb size filter

In [135]:
# Define a function to check the condition for each ID
def check_uptake_condition(group):
    initial_status = group['Initial Infection Status'].iloc[0]
    final_status = group['Final Infection Status'].iloc[-1]
    return initial_status == False and final_status == True

# Identify the IDs that meet the condition
ids_with_uptake = df.groupby('ID').apply(check_uptake_condition)
ids_with_uptake = ids_with_uptake[ids_with_uptake].index

# Create the 'uptake' column and set it to True for the identified IDs
df['Uptake'] = df['ID'].isin(ids_with_uptake)

In [137]:
# Create the 'uptake' column and set it to True for the identified IDs
df['Uptake'].value_counts()

False    1134338
True      180671
Name: uptake, dtype: int64

In [141]:
df.to_pickle(os.path.join(base_dir, 'results/dfs/sc_df.pkl'))

# Edit these categories to include others

In [17]:
df['Growth'] = np.nan
df['Reduction'] = np.nan
df['Containment'] = np.nan

for unique_ID in tqdm(df['ID'].unique()):
    sc_dt_df = df[df['ID'] == unique_ID]    
    # create a median rolling window param based on 5 hours (depdendent on time frame of expt)
    window =  5 if 'PS0000' in unique_ID else 10 
    # isolate the time values
    time_points = sc_dt_df['Time (hours)'].values
    # isolate and interpolate the population values to remove NaN values 
    bacterial_population = sc_dt_df['Mtb Area (µm)'].interpolate(method='linear').interpolate(method='backfill').rolling(window=window).median().bfill().values


    # Fit linear regression model
    slope, intercept, r_value, p_value, std_err = stats.linregress(time_points, bacterial_population)

    ### first non zero value TO DO 
    # Determine classification based on the slope
    growth = slope >= 1.92/70
    reduction = slope <= -1.92/70
    containment = -1.92/70 < slope < 1.92/70

    # Update the main DataFrame with the classifications
    df.loc[df['ID'] == unique_ID, 'Growth'] = growth
    df.loc[df['ID'] == unique_ID, 'Reduction'] = reduction
    df.loc[df['ID'] == unique_ID, 'Containment'] = containment

  0%|          | 0/13405 [00:00<?, ?it/s]

In [18]:
df.to_pickle('/mnt/SYNO/macrohet_syno/results/dfs/dt_df_initial_cats.pkl')

In [5]:
df = pd.read_pickle('/mnt/SYNO/macrohet_syno/results/dfs/dt_df.pkl')
# Assuming your DataFrame is named 'df'
df['Strain/Compound'] = df['Compound']  # Create a new column and copy values from 'Compound'
# Replace values in 'NewColumn' where 'Strain' is 'RD1' with 'RD1'
df.loc[df['Strain'] == 'RD1', 'Strain/Compound'] = 'RD1'
df

Unnamed: 0,index,Acquisition ID,Biological Replicate,Cell ID,Compound,Concentration,Eccentricity,Experiment ID,Final Infection Status,Frame,...,Unique ID,dMphi Area (µm),dMtb Area (µm),dt,r2,x,y,doubling_times,dmtb_values,Strain/Compound
0,0,"(3, 4)",1,426,CTRL,EC0,0.929721,PS0000,0.0,0.0,...,426.3.4,-92.746118,-0.424621,7.0,0.944356,457.172943,8.791715,"[2.0, 3.0, 4.0, 9.0, 17.0]","[0.15, 0.31, 0.62, 1.23, 2.46]",RD1
1,1,"(3, 4)",1,426,CTRL,EC0,0.977788,PS0000,0.0,1.0,...,426.3.4,-92.746118,-0.424621,7.0,0.944356,459.138947,6.718642,"[2.0, 3.0, 4.0, 9.0, 17.0]","[0.15, 0.31, 0.62, 1.23, 2.46]",RD1
2,2,"(3, 4)",1,426,CTRL,EC0,0.733415,PS0000,0.0,2.0,...,426.3.4,-92.746118,-0.424621,7.0,0.944356,460.555237,10.785886,"[2.0, 3.0, 4.0, 9.0, 17.0]","[0.15, 0.31, 0.62, 1.23, 2.46]",RD1
3,3,"(3, 4)",1,426,CTRL,EC0,0.685354,PS0000,0.0,3.0,...,426.3.4,-92.746118,-0.424621,7.0,0.944356,455.878815,11.143067,"[2.0, 3.0, 4.0, 9.0, 17.0]","[0.15, 0.31, 0.62, 1.23, 2.46]",RD1
4,4,"(3, 4)",1,426,CTRL,EC0,0.727210,PS0000,0.0,4.0,...,426.3.4,-92.746118,-0.424621,7.0,0.944356,462.634186,14.050420,"[2.0, 3.0, 4.0, 9.0, 17.0]","[0.15, 0.31, 0.62, 1.23, 2.46]",RD1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1315004,1797106,"(6, 12)",3,1874,BDQ,EC99,0.571813,ND0003,1.0,,...,1874.6.12,55.088959,1.095075,,,227.093262,130.358139,[],[],RD1
1315005,1797107,"(6, 12)",3,1874,BDQ,EC99,0.373264,ND0003,1.0,,...,1874.6.12,55.088959,1.095075,,,226.622925,130.532120,[],[],RD1
1315006,1797108,"(6, 12)",3,1874,BDQ,EC99,0.490623,ND0003,1.0,,...,1874.6.12,55.088959,1.095075,,,225.366608,129.932709,[],[],RD1
1315007,1797109,"(6, 12)",3,1874,BDQ,EC99,0.693277,ND0003,1.0,,...,1874.6.12,55.088959,1.095075,,,226.424683,128.489410,[],[],RD1
