In [None]:

# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv("heart.csv")

# Display dataset columns
print("Columns in the dataset:", df.columns.tolist())

# Display first and last few rows
print("\nFirst 5 Rows:\n", df.head())
print("\nLast 5 Rows:\n", df.tail())

# Display descriptive statistics
print("\nDescriptive Statistics:\n", df.describe())

# Check for missing values
print("\nMissing Values Before Imputation:\n", df.isnull().sum())

# Handle missing values using mean imputation
df.fillna(df.mean(), inplace=True)

# Display missing values count after imputation
print("\nMissing Values After Imputation:\n", df.isnull().sum())

# Check data shape before outlier removal
print("\nShape before removing outliers:", df.shape)

# Remove outliers in the "chol" column using the IQR method
Q1 = df["chol"].quantile(0.25)
Q3 = df["chol"].quantile(0.75)
IQR = Q3 - Q1
df = df[(df["chol"] >= (Q1 - 1.5 * IQR)) & (df["chol"] <= (Q3 + 1.5 * IQR))]

# Check data shape after outlier removal
print("\nShape after removing outliers:", df.shape)

In [None]:
# Compute statistics for cholesterol
chol_mean = np.mean(df["chol"])
chol_median = np.median(df["chol"])
chol_std = np.std(df["chol"])

print("\nCholesterol - Mean:", chol_mean)
print("Cholesterol - Median:", chol_median)
print("Cholesterol - Std Dev:", chol_std)

# Compute mean blood pressure for patients with & without heart disease
trestbps_mean_hd = df[df["target"] == 1]["trestbps"].mean()
trestbps_mean_nohd = df[df["target"] == 0]["trestbps"].mean()

print("\nMean Blood Pressure (Heart Disease):", trestbps_mean_hd)
print("Mean Blood Pressure (No Heart Disease):", trestbps_mean_nohd)

# Find max and min heart rate
thalach_max = df["thalach"].max()
thalach_min = df["thalach"].min()

print("\nMax Heart Rate:", thalach_max)
print("Min Heart Rate:", thalach_min)

# Sort patients by cholesterol levels (descending order)
df_sorted = df.sort_values(by="chol", ascending=False)
print("\nTop 5 Patients with Highest Cholesterol:\n", df_sorted.head())

# Identify patients with cholesterol > 300 mg/dL
high_chol_patients = df[df["chol"] > 300]
print("\nPatients with Cholesterol > 300 mg/dL:\n", high_chol_patients)

# Find patients older than 60 with abnormal ECG (restecg > 0)
abnormal_ecg_patients = df[(df["age"] > 60) & (df["restecg"] > 0)]
print("\nPatients Older than 60 with Abnormal ECG:\n", abnormal_ecg_patients)

In [None]:
# Reshape and split dataset using NumPy
reshaped_data = df.values.reshape(-1, df.shape[1])
split_data = np.array_split(reshaped_data, 2)

print("Data reshaped and split into two parts.")

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(df["chol"], bins=30, kde=True)
plt.title("Cholesterol Distribution")
plt.xlabel("Cholesterol")
plt.ylabel("Frequency")
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(x=df["age"], y=df["thalach"])
plt.title("Age vs Maximum Heart Rate")
plt.xlabel("Age")
plt.ylabel("Max Heart Rate (thalach)")
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.countplot(x=df["target"])
plt.title("Heart Disease vs No Heart Disease")
plt.xlabel("Heart Disease (0 = No, 1 = Yes)")
plt.ylabel("Count")
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df["chol"], df["age"], df["target"], c=df["target"], cmap="coolwarm")
ax.set_xlabel("Cholesterol")
ax.set_ylabel("Age")
ax.set_zlabel("Heart Disease")
plt.title("3D Plot: Cholesterol vs Age vs Heart Disease")
plt.show()

In [None]:
plt.figure(figsize=(8, 8))
df["cp"].value_counts().plot.pie(autopct="%1.1f%%", cmap="coolwarm")
plt.title("Chest Pain Types Distribution")
plt.ylabel("")
plt.show()