# Compare Fold-Change and Chi-Squared Results
### Group 33, Florida Atlantic University
- Compare top-ranked features from fold-change and chi-squared feature selection methods.
- Identify overlapping and unique features.
- Save results for biomarker ranking and downstream analysis.


#### Load and Inspect Data

In [None]:
# Load necessary libraries
import pandas as pd

# Load results from the results folder
fold_change_df = pd.read_csv('../results/fold_change_results.csv')
chi2_ranked_df = pd.read_csv('../results/chi_squared_features.csv')

# Inspect the datasets
print("Fold-change dataset:")
print("Columns:", fold_change_df.columns)
print(fold_change_df.head())

print("\nChi-squared dataset:")
print("Columns:", chi2_ranked_df.columns)
print(chi2_ranked_df.head())

#### Clean and Align Datasets

In [None]:
# Clean fold-change dataset
if 'Unnamed: 0' in fold_change_df.columns:
    fold_change_df.rename(columns={"Unnamed: 0": "Feature", "0": "Score"}, inplace=True)

# Clean chi-squared dataset
chi2_ranked_df = chi2_ranked_df[['Feature', 'Chi2_Score']].rename(columns={'Chi2_Score': 'Score'})

# Ensure feature names are standardized
fold_change_df['Feature'] = fold_change_df['Feature'].astype(str).str.strip()
chi2_ranked_df['Feature'] = chi2_ranked_df['Feature'].astype(str).str.strip()

print("Cleaned datasets:")
print("\nFold-change:")
print(fold_change_df.head())
print("\nChi-squared:")
print(chi2_ranked_df.head())

#### Extract Top Features

In [None]:
# Set number of top features to compare
n_features = 100

# Get top features from both methods
top_fold_change = fold_change_df.head(n_features)['Feature'].tolist()
top_chi2 = chi2_ranked_df.head(n_features)['Feature'].tolist()

print(f"Number of features selected from each method: {n_features}")
print("\nExample fold-change features (first 5):")
for i, feature in enumerate(top_fold_change[:5], 1):
    score = fold_change_df[fold_change_df['Feature'] == feature]['Score'].iloc[0]
    print(f"{i}. {feature} (score: {score:.4f})")

print("\nExample chi-squared features (first 5):")
for i, feature in enumerate(top_chi2[:5], 1):
    score = chi2_ranked_df[chi2_ranked_df['Feature'] == feature]['Score'].iloc[0]
    print(f"{i}. {feature} (score: {score:.4f})")

#### Find Overlapping Features

In [None]:
# Find overlapping features
overlaps = set(top_fold_change).intersection(set(top_chi2))

print(f"Number of overlapping features: {len(overlaps)}")
if overlaps:
    print("\nOverlapping features:")
    for feature in overlaps:
        fc_score = fold_change_df[fold_change_df['Feature'] == feature]['Score'].iloc[0]
        chi2_score = chi2_ranked_df[chi2_ranked_df['Feature'] == feature]['Score'].iloc[0]
        print(f"\n{feature}:")
        print(f"  Fold-change score: {fc_score:.4f}")
        print(f"  Chi-squared score: {chi2_score:.4f}")
else:
    print("\nNo overlapping features found")

#### Save Results

In [None]:
# Create summary DataFrame
summary_data = {
    'Method': ['Fold Change', 'Chi-squared', 'Overlap'],
    'Number of Features': [len(top_fold_change), len(top_chi2), len(overlaps)]
}
summary_df = pd.DataFrame(summary_data)

# Save overlapping features if any exist
if overlaps:
    overlap_data = []
    for feature in overlaps:
        fc_score = fold_change_df[fold_change_df['Feature'] == feature]['Score'].iloc[0]
        chi2_score = chi2_ranked_df[chi2_ranked_df['Feature'] == feature]['Score'].iloc[0]
        overlap_data.append({
            'Feature': feature,
            'Fold_Change_Score': fc_score,
            'Chi_Squared_Score': chi2_score
        })
    overlap_df = pd.DataFrame(overlap_data)
    overlap_df.to_csv('../results/overlapping_features.csv', index=False)

# Save summary
summary_df.to_csv('../results/feature_comparison_summary.csv', index=False)

print("Results saved to results folder")
print("\nSummary:")
print(summary_df)