In [1]:
import pandas as pd
import numpy as np
from scipy import stats

# Read the Excel file
df = pd.read_excel('/home/gauss/Desktop/CellProfiler_Features/O8W.xlsx')


In [2]:
# Get the list of unique plate numbers
plate_numbers = df['Plate number'].unique()

# Initialize an empty list to store normalized dataframes
normalized_dfs = []

# Iterate through each plate
for plate in plate_numbers:
    # Filter data for the current plate
    plate_data = df[df['Plate number'] == plate]
    
    # Identify DMSO wells
    dmso_data = plate_data[plate_data['Compound'] == 'DMSO']
    
    # Identify feature columns starting with "Mean_"
    feature_cols = [col for col in plate_data.columns if col.startswith('Mean_')]
    
    # Compute median and median absolute deviation for DMSO wells
    dmso_median = dmso_data[feature_cols].median()
    dmso_mad = dmso_data[feature_cols].apply(stats.median_abs_deviation)
    
    # Perform normalization
    normalized_data = plate_data.copy()
    normalized_data[feature_cols] = (normalized_data[feature_cols] - dmso_median) / dmso_mad
    
    # Append normalized data to the list
    normalized_dfs.append(normalized_data)

# Combine all normalized dataframes
df_normalized = pd.concat(normalized_dfs, ignore_index=True)

# Remove columns starting with "Median_" or "StDev_" or ending with "_X" or "_Y"
columns_to_keep = [col for col in df_normalized.columns if not (
    col.startswith('Median_') or 
    col.startswith('StDev_') or 
    col.endswith('_X') or 
    col.endswith('_Y')
)]

df_normalized_filtered = df_normalized[columns_to_keep]

# Save the filtered normalized dataframe
df_normalized_filtered.to_excel('/home/gauss/Desktop/CellProfiler_Features/O8W_normalized.xlsx', index=False)
