In [None]:
%pip install matplotlib-venn

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib_venn import venn2

In [None]:

# Load datasets
hypothyroid_df = pd.read_csv('../data/processed/hypothyroid_condition.csv', sep='\t')
hyperthyroid_df = pd.read_csv('../data/processed/hyperthyroid_condition.csv', sep='\t')


In [None]:

# Venn Diagram using only significant genes
hypo_genes = set(hypothyroid_df[hypothyroid_df['Significance'] == 1]['GeneSymbol'])
hyper_genes = set(hyperthyroid_df[hyperthyroid_df['Significance'] == 1]['GeneSymbol'])

plt.figure(figsize=(6, 6))
venn2([hypo_genes, hyper_genes], ('No T3', 'High T3'))
plt.title('Gene Overlap Between Conditions')
plt.savefig('../figures/main/venn_diagram.png')
plt.show()

print("Analysis complete: Volcano plots and Venn diagram saved.")


In [None]:
# Create top DEGs table
def create_top_degs_table(df, condition):
    top_high = df.nlargest(5, 'log2FC')[['GeneSymbol', 'Gene ID (Biomart)', 'log2FC']]
    top_low = df.nsmallest(5, 'log2FC')[['GeneSymbol', 'Gene ID (Biomart)', 'log2FC']]
    
    top_high['Direction'] = 'Upregulated'
    top_low['Direction'] = 'Downregulated'
    
    return pd.concat([top_high, top_low]).reset_index(drop=True)

hypothyroid_top_degs = create_top_degs_table(hypothyroid_df, 'Hypothyroid')
hyperthyroid_top_degs = create_top_degs_table(hyperthyroid_df, 'Hyperthyroid')


In [None]:

# Display tables
display(hypothyroid_top_degs)
display(hyperthyroid_top_degs)

In [None]:
# Extract genes changing in the same or opposing directions
significant_genes = hypothyroid_df.merge(hyperthyroid_df, on='GeneSymbol', suffixes=('_hypo', '_hyper'))

# Keep only genes that are significant in both conditions
significant_genes = significant_genes[(significant_genes['Significance_hypo'] == 1) & (significant_genes['Significance_hyper'] == 1)]

# Rename log2FC columns for clarity
significant_genes = significant_genes.rename(columns={'log2FC_hypo': 'log2FC_hypothyroid', 'log2FC_hyper': 'log2FC_hyperthyroid'})


In [None]:
display(significant_genes)

In [None]:

# Identify genes changing in the same direction
same_direction = significant_genes[(significant_genes['log2FC_hypothyroid'] * significant_genes['log2FC_hyperthyroid']) > 0]

display(same_direction)


In [None]:
# Plot log2FC values for same direction genes
plt.figure(figsize=(10, 8))
plt.scatter(same_direction['log2FC_hypothyroid'], same_direction['log2FC_hyperthyroid'], color='blue', alpha=0.7)
for i, row in same_direction.iterrows():
    plt.text(row['log2FC_hypothyroid'], row['log2FC_hyperthyroid'], row['GeneSymbol'], fontsize=8)
plt.axhline(0, linestyle='--', color='black', linewidth=1)
plt.axvline(0, linestyle='--', color='black', linewidth=1)
plt.xlabel('log2FC Hypothyroid')
plt.ylabel('log2FC Hyperthyroid')
plt.title('Genes Changing in the Same Direction')
plt.savefig('../figures/main/same_direction_plot.png')
plt.show()

In [None]:
# Identify genes changing in opposite directions
opposite_direction = significant_genes[(significant_genes['log2FC_hypothyroid'] * significant_genes['log2FC_hyperthyroid']) < 0]

display(opposite_direction)

In [None]:
# Identify genes changing in opposite directions
opposite_direction_up = significant_genes[(significant_genes['log2FC_hypothyroid'] > 0) & (significant_genes['log2FC_hyperthyroid'] < 0)]
opposite_direction_down = significant_genes[(significant_genes['log2FC_hypothyroid'] < 0) & (significant_genes['log2FC_hyperthyroid'] > 0)]

In [None]:
# Plot log2FC values for opposing directions separately
plt.figure(figsize=(10, 8))
plt.scatter(opposite_direction_up['log2FC_hypothyroid'], opposite_direction_up['log2FC_hyperthyroid'], color='red', alpha=0.7, label='Hypo Up, Hyper Down')
plt.scatter(opposite_direction_down['log2FC_hypothyroid'], opposite_direction_down['log2FC_hyperthyroid'], color='green', alpha=0.7, label='Hypo Down, Hyper Up')
for i, row in opposite_direction_up.iterrows():
    plt.text(row['log2FC_hypothyroid'], row['log2FC_hyperthyroid'], row['GeneSymbol'], fontsize=8)
for i, row in opposite_direction_down.iterrows():
    plt.text(row['log2FC_hypothyroid'], row['log2FC_hyperthyroid'], row['GeneSymbol'], fontsize=8)
plt.axhline(0, linestyle='--', color='black', linewidth=1)
plt.axvline(0, linestyle='--', color='black', linewidth=1)
plt.xlabel('log2FC Hypothyroid')
plt.ylabel('log2FC Hyperthyroid')
plt.title('Genes Changing in Opposing Directions')
plt.legend()
plt.savefig('../figures/main/opposing_direction_plot.png')
plt.show()

In [None]:
# Categorize gene regulation directions
same_direction = significant_genes[(significant_genes['log2FC_hypothyroid'] * significant_genes['log2FC_hyperthyroid']) > 0]
opposite_direction_up = significant_genes[(significant_genes['log2FC_hypothyroid'] > 0) & (significant_genes['log2FC_hyperthyroid'] < 0)]
opposite_direction_down = significant_genes[(significant_genes['log2FC_hypothyroid'] < 0) & (significant_genes['log2FC_hyperthyroid'] > 0)]

# Plot all in one
plt.figure(figsize=(10, 8))
plt.scatter(same_direction['log2FC_hypothyroid'], same_direction['log2FC_hyperthyroid'],
            color='blue', alpha=0.7, label='Same Direction')
plt.scatter(opposite_direction_up['log2FC_hypothyroid'], opposite_direction_up['log2FC_hyperthyroid'],
            color='red', alpha=0.7, label='No T3 Up, High T3 Down')
plt.scatter(opposite_direction_down['log2FC_hypothyroid'], opposite_direction_down['log2FC_hyperthyroid'],
            color='green', alpha=0.7, label='No T3 Down, High T3 Up')

# Add gene labels (optional; can be noisy if many points)
for df in [same_direction, opposite_direction_up, opposite_direction_down]:
    for i, row in df.iterrows():
        plt.text(row['log2FC_hypothyroid'], row['log2FC_hyperthyroid'], row['GeneSymbol'], fontsize=12)

# Reference lines
plt.axhline(0, linestyle='--', color='black', linewidth=1)
plt.axvline(0, linestyle='--', color='black', linewidth=1)

# Axis labels and title
plt.xlabel('log2FC No T3', fontsize=14)
plt.ylabel('log2FC High T3', fontsize=14)
plt.title('Differential Expression: Same vs Opposing Direction Genes', fontsize=18)
plt.xlim(None, 1.0)
plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('../figures/main/combined_direction_plot.png')
plt.show()


In [None]:
# Export DEG tables to CSV

# 1. All DEGs for either condition with condition column
hypo_sig = hypothyroid_df[hypothyroid_df['Significance'] == 1].copy()
hypo_sig['Condition'] = 'no T3'

hyper_sig = hyperthyroid_df[hyperthyroid_df['Significance'] == 1].copy()
hyper_sig['Condition'] = 'high T3'

all_degs = pd.concat([hypo_sig, hyper_sig], ignore_index=True)
all_degs.to_csv('../data/results/all_degs.csv', index=False)

# 2. Overlapping DEGs (significant in both conditions)
# Combine same_direction and opposite_direction (which includes both up and down)
overlapping_degs = significant_genes.copy()
overlapping_degs.to_csv('../data/results/overlapping_degs.csv', index=False)

print(f"Exported {len(all_degs)} total DEGs to '../data/results/all_degs.csv'")
print(f"Exported {len(overlapping_degs)} overlapping DEGs to '../data/results/overlapping_degs.csv'")

In [None]:
# Export DEG tables to CSV

# 1. All DEGs for either condition with condition column
hypo_sig = hypothyroid_df[hypothyroid_df['Significance'] == 1].copy()
hypo_sig['Condition'] = 'no T3'

hyper_sig = hyperthyroid_df[hyperthyroid_df['Significance'] == 1].copy()
hyper_sig['Condition'] = 'high T3'

all_degs = pd.concat([hypo_sig, hyper_sig], ignore_index=True)
# Round log2FC to 2 decimal places and drop Significance column
all_degs['log2FC'] = all_degs['log2FC'].round(2)
all_degs = all_degs.drop(columns=['Significance'])
all_degs.to_csv('../data/results/all_degs.csv', index=False)

# 2. Overlapping DEGs (significant in both conditions)
overlapping_degs = significant_genes.copy()
# Round log2FC columns to 2 decimal places
overlapping_degs['log2FC_hypothyroid'] = overlapping_degs['log2FC_hypothyroid'].round(2)
overlapping_degs['log2FC_hyperthyroid'] = overlapping_degs['log2FC_hyperthyroid'].round(2)
# Keep only one Gene ID column and drop Significance columns
overlapping_degs = overlapping_degs.rename(columns={'Gene ID (Biomart)_hypo': 'Gene ID (Biomart)'})
overlapping_degs = overlapping_degs.drop(columns=['Gene ID (Biomart)_hyper', 'Significance_hypo', 'Significance_hyper'])
overlapping_degs.to_csv('../data/results/overlapping_degs.csv', index=False)

print(f"Exported {len(all_degs)} total DEGs to '../data/results/all_degs.csv'")
print(f"Exported {len(overlapping_degs)} overlapping DEGs to '../data/results/overlapping_degs.csv'")