# Gene Statistics Analysis
This notebook calculates per-gene statistics (e.g., min, max, mean) across ratio columns.

In [None]:
# Import necessary libraries
import pandas as pd

# File paths
input_file = '/path/to/corrected_me_raw_ratios_final.txt'
output_file = '/path/to/final_dataset_with_gene_statistics.txt'

# Load the dataset
final_df = pd.read_csv(input_file, sep="\t")

# Identify ratio columns
ratio_columns = [col for col in final_df.columns if col.endswith("_raw_ratio")]

In [None]:
# Calculate statistics per gene across all ratio columns
stats_per_gene = final_df[ratio_columns].agg(['min', 'max', 'mean', 'median', 'std', 'var'], axis=1)

# Rename the statistics columns for clarity
stats_per_gene.columns = [f"ratio_{stat}" for stat in stats_per_gene.columns]

# Merge these statistics back into the original dataframe
final_with_gene_stats_df = pd.concat([final_df, stats_per_gene], axis=1)

In [None]:
# Save the updated dataset with per-gene statistics
final_with_gene_stats_df.to_csv(output_file, sep="\t", index=False)

print(f"Updated dataset with per-gene statistics saved to {output_file}")