In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import glob
from sklearn.preprocessing import StandardScaler

path = "./data/processed.*.data"
files = glob.glob(path)
print("\nfiles:",files)
df= pd.concat(
    (pd.read_csv(f,  sep=",", na_values=0, encoding="latin1") for f in files),
)

columns = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg","thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"]
df = df.iloc[:, :14]
df.columns = columns
df = df.fillna(0)
df = df.fillna(0)
df = df.applymap(lambda x: 0 if isinstance(x, str) else x)

features = df.drop("target", axis=1)
target = df["target"]
#standardizing data
scaler = StandardScaler()
standardized_data = scaler.fit_transform(features)
standardized_df = pd.DataFrame(standardized_data, columns=features.columns)
print(standardized_df)

#Exploratory Data Analysis
plt.figure(figsize=(12, 10))
for i, column in enumerate(standardized_df.columns[:-1], 1):
    plt.subplot(4, 3, i)  # 4x3 grid fits 13 features
    sns.histplot(df[column], kde=True)
    plt.title(f'Distribution of {column}')
plt.tight_layout()
plt.show()

corr = standardized_df.iloc[:, :-1].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0)
plt.title('Correlation Heatmap')
plt.show()



plt.figure(figsize=(12, 10))
for i, column in enumerate(standardized_df.columns[:-1], 1):
    plt.subplot(4, 3, i)
    sns.boxplot(y=standardized_df[column])
    plt.title(f'Boxplot of {column}')
plt.tight_layout()
plt.show()

def main():
    print("Heart Disease dataset loaded and standardized.")
if __name__ == "__main__":
    main()