In [1]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

In [2]:
# Load data
X_train = pd.read_csv('../../data/processed/enhanced_X_train.csv')

In [3]:
def analyze_symptom_correlations(X):
    """Analyze correlations between symptoms"""
    corr_matrix = X.corr()

    # 🔹 Reduce the heatmap size for clarity
    plt.figure(figsize=(12, 12))
    sns.heatmap(corr_matrix, cmap='coolwarm', xticklabels=False, yticklabels=False)
    plt.title('Symptom Correlations (Heatmap)')
    plt.tight_layout()
    plt.savefig('../../src/visualizations/symptom_correlations.png')
    plt.close()

    return corr_matrix

In [4]:
def create_symptom_network(corr_matrix, threshold=0.3):
    """Create network graph of related symptoms"""
    G = nx.Graph()

    for i in range(len(corr_matrix)):
        for j in range(i+1, len(corr_matrix)):
            if abs(corr_matrix.iloc[i, j]) > threshold:
                G.add_edge(corr_matrix.index[i], 
                           corr_matrix.index[j], 
                           weight=abs(corr_matrix.iloc[i, j]))

    return G

In [5]:
def analyze_symptom_patterns():
    """Analyze common symptom patterns"""
    # Your analysis code here
    pass

In [6]:
# Run analyses
corr_matrix = analyze_symptom_correlations(X_train)
symptom_network = create_symptom_network(corr_matrix)

In [7]:
# Save results
results = {
    'correlation_matrix': corr_matrix,
    'symptom_network': symptom_network
}
joblib.dump(results, '../../data/processed/symptom_analysis.joblib')


['../../data/processed/symptom_analysis.joblib']