In [7]:
import pandas as pd


In [None]:
# Load dataset (replace 'your_file.csv' with actual filename)
df = pd.read_csv('T3_expression_data.csv', sep=',')  # Adjust separator if necessary

In [9]:

# Calculate log2FC manually based on expression values
# Hypothyroid (CTR vs T3L) should be CTR-T3L, so we subtract T3L from CTR
# Hyperthyroid (T3H vs T3L) should be T3H-T3L, so we subtract T3L from T3H
df['log2FC_hypothyroid'] = df['CTR'] - df['T3L']
df['log2FC_hyperthyroid'] = df['T3H'] - df['T3L']

In [10]:

# Select relevant columns
hypothyroid_df = df.loc[:, ['GeneSymbol', 'Gene ID (Biomart)', 'log2FC_hypothyroid', 'CTR-T3L']]
hyperthyroid_df = df.loc[:, ['GeneSymbol', 'Gene ID (Biomart)', 'log2FC_hyperthyroid', 'T3L-T3H']]


# Rename significance columns for clarity
hypothyroid_df = hypothyroid_df.rename(columns={'CTR-T3L': 'Significance'})
hypothyroid_df = hypothyroid_df.rename(columns={'log2FC_hypothyroid': 'log2FC'})
hyperthyroid_df = hyperthyroid_df.rename(columns={'T3L-T3H': 'Significance'})
hyperthyroid_df = hyperthyroid_df.rename(columns={'log2FC_hyperthyroid': 'log2FC'})



In [11]:

# Save to output files, keeping only significant rows
hypothyroid_df.to_csv('hypothyroid_condition.csv', index=False, sep='\t')
hyperthyroid_df.to_csv('hyperthyroid_condition.csv', index=False, sep='\t')

print("Files generated: hypothyroid_condition.csv (CTR vs T3L), hyperthyroid_condition.csv (T3H vs T3L)")


Files generated: hypothyroid_condition.csv (CTR vs T3L), hyperthyroid_condition.csv (T3H vs T3L)


In [13]:
# Prepare for combined visualisation

# Load background files
hypo_df = pd.read_csv('hypothyroid_condition.csv', sep='\t')
hyper_df = pd.read_csv('hyperthyroid_condition.csv', sep='\t')


# Optionally: prefix expression-related columns
# If you want to keep them separate (e.g. for later differential comparison)
# Rename log2FC and Significance columns to reflect condition
hypo_df = hypo_df.rename(columns={
    'log2FC': 'log2FC_hypo',
    'Significance': 'Significance_hypo'
})

hyper_df = hyper_df.rename(columns={
    'log2FC': 'log2FC_hyper',
    'Significance': 'Significance_hyper'
})

# Merge on GeneSymbol and/or Gene ID
# Choose based on which ID is more consistent
combined_df = pd.merge(hypo_df, hyper_df, on=['GeneSymbol', 'Gene ID (Biomart)'], how='outer')

# Save to file
combined_df.to_csv('combined_background.csv', index=False)

print("✅ Combined background dataset saved to 'combined_background.csv'")


✅ Combined background dataset saved to 'combined_background.csv'
