In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from six import StringIO
from IPython.display import Image
import pydotplus

In [None]:
data = pd.read_csv('heart_attack_prediction_dataset.csv')

selected_columns = ['Age', 'Sex', 'Cholesterol', 'Blood Pressure', 'Diabetes', 'Smoking', 'Obesity',
                    'Exercise Hours Per Week', 'Diet', 'Previous Heart Problems', 'Medication Use',
                    'Stress Level', 'BMI', 'Physical Activity Days Per Week', 'Heart Attack Risk']

data = data[selected_columns]

numeric_columns = data.select_dtypes(include=['number']).columns

non_numeric_columns = data.select_dtypes(include=['object']).columns

print("Numeric Columns:\n" + ', '.join(numeric_columns))
print('\n')
print("Non-Numeric Columns:\n" + ', '.join(non_numeric_columns))
print('\n')
print(data.head())

In [None]:
# Line Plot
num_plots = len(numeric_columns)
num_cols = 2  # Set the number of columns in the grid
num_rows = (num_plots // num_cols) + (num_plots % num_cols > 0)  # Calculate the number of rows needed

fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows))

for i, col in enumerate(numeric_columns):
    row_idx = i // num_cols
    col_idx = i % num_cols

    sns.lineplot(x=data.index, y=data[col], ax=axes[row_idx, col_idx])
    axes[row_idx, col_idx].set_title(f'Line Plot of {col}')

# Remove empty subplots
for i in range(num_plots, num_rows * num_cols):
    fig.delaxes(axes.flatten()[i])

plt.tight_layout()
plt.show()


In [None]:
# Histogram plot
num_cols_hist = 2  # Set the number of columns for the histogram plot
num_rows_hist = (len(numeric_columns) // num_cols_hist) + (len(numeric_columns) % num_cols_hist > 0)

fig_hist, axes_hist = plt.subplots(num_rows_hist, num_cols_hist, figsize=(15, 5*num_rows_hist))

for i, col in enumerate(numeric_columns):
    row_idx = i // num_cols_hist
    col_idx = i % num_cols_hist

    sns.histplot(data[col], bins=20, ax=axes_hist[row_idx, col_idx])
    axes_hist[row_idx, col_idx].set_title(f'Histogram of {col}')

# Remove empty subplots
for i in range(len(numeric_columns), num_rows_hist * num_cols_hist):
    fig_hist.delaxes(axes_hist.flatten()[i])

plt.tight_layout()
plt.show()


In [None]:
# Density Plot
num_cols_density = 2  # Set the number of columns for the density plot
num_rows_density = (len(numeric_columns) // num_cols_density) + (len(numeric_columns) % num_cols_density > 0)

fig_density, axes_density = plt.subplots(num_rows_density, num_cols_density, figsize=(15, 5*num_rows_density))

for i, col in enumerate(numeric_columns):
    row_idx = i // num_cols_density
    col_idx = i % num_cols_density

    sns.kdeplot(data[col], fill=True, ax=axes_density[row_idx, col_idx])
    axes_density[row_idx, col_idx].set_title(f'Density Plot of {col}')

# Remove empty subplots
for i in range(len(numeric_columns), num_rows_density * num_cols_density):
    fig_density.delaxes(axes_density.flatten()[i])

plt.tight_layout()
plt.show()


In [None]:
# Vertical Bar Plot
num_cols_bar = 2  # Set the number of columns for the bar plot
num_rows_bar = (len(numeric_columns) // num_cols_bar) + (len(numeric_columns) % num_cols_bar > 0)

fig_bar, axes_bar = plt.subplots(num_rows_bar, num_cols_bar, figsize=(15, 5*num_rows_bar))

for i, col in enumerate(numeric_columns):
    row_idx = i // num_cols_bar
    col_idx = i % num_cols_bar

    sns.barplot(x=data.index, y=data[col], ax=axes_bar[row_idx, col_idx])
    axes_bar[row_idx, col_idx].set_title(f'Vertical Bar of {col}')

# Remove empty subplots
for i in range(len(numeric_columns), num_rows_bar * num_cols_bar):
    fig_bar.delaxes(axes_bar.flatten()[i])

plt.tight_layout()
plt.show()
# دا بطئ فوقفت الرن بتاعه


In [None]:
# preprocessing
print("\n************ preprocessing ************")
print("check missing values:\n", data.isnull().sum(), '\n')
# data.dropna(inplace=True)
le = LabelEncoder()
data['Sex'] = le.fit_transform(data['Sex'])
data['Diet'] = le.fit_transform(data['Diet'])
data['Blood Pressure'] = data['Blood Pressure'].apply(lambda x: float(x.split('/')[0]) if '/' in str(x) else float(x))
print("After preprocessing:\n")
print(data['Diet'], "\n")
print(data['Sex'], "\n")
print(data['Blood Pressure'], "\n")

In [None]:
# Line Plot
num_plots = len(numeric_columns)
num_cols = 2  # Set the number of columns in the grid
num_rows = (num_plots // num_cols) + (num_plots % num_cols > 0)  # Calculate the number of rows needed

fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows))

for i, col in enumerate(numeric_columns):
    row_idx = i // num_cols
    col_idx = i % num_cols

    sns.lineplot(x=data.index, y=data[col], ax=axes[row_idx, col_idx])
    axes[row_idx, col_idx].set_title(f'Line Plot of {col}')

# Remove empty subplots
for i in range(num_plots, num_rows * num_cols):
    fig.delaxes(axes.flatten()[i])

plt.tight_layout()
plt.show()


In [None]:
# Histogram plot
num_cols_hist = 2  # Set the number of columns for the histogram plot
num_rows_hist = (len(numeric_columns) // num_cols_hist) + (len(numeric_columns) % num_cols_hist > 0)

fig_hist, axes_hist = plt.subplots(num_rows_hist, num_cols_hist, figsize=(15, 5*num_rows_hist))

for i, col in enumerate(numeric_columns):
    row_idx = i // num_cols_hist
    col_idx = i % num_cols_hist

    sns.histplot(data[col], bins=20, ax=axes_hist[row_idx, col_idx])
    axes_hist[row_idx, col_idx].set_title(f'Histogram of {col}')

# Remove empty subplots
for i in range(len(numeric_columns), num_rows_hist * num_cols_hist):
    fig_hist.delaxes(axes_hist.flatten()[i])

plt.tight_layout()
plt.show()


In [None]:
# Density Plot
num_cols_density = 2  # Set the number of columns for the density plot
num_rows_density = (len(numeric_columns) // num_cols_density) + (len(numeric_columns) % num_cols_density > 0)

fig_density, axes_density = plt.subplots(num_rows_density, num_cols_density, figsize=(15, 5*num_rows_density))

for i, col in enumerate(numeric_columns):
    row_idx = i // num_cols_density
    col_idx = i % num_cols_density

    sns.kdeplot(data[col], fill=True, ax=axes_density[row_idx, col_idx])
    axes_density[row_idx, col_idx].set_title(f'Density Plot of {col}')

# Remove empty subplots
for i in range(len(numeric_columns), num_rows_density * num_cols_density):
    fig_density.delaxes(axes_density.flatten()[i])

plt.tight_layout()
plt.show()


In [None]:
# Vertical Bar Plot
num_cols_bar = 2  # Set the number of columns for the bar plot
num_rows_bar = (len(numeric_columns) // num_cols_bar) + (len(numeric_columns) % num_cols_bar > 0)

fig_bar, axes_bar = plt.subplots(num_rows_bar, num_cols_bar, figsize=(15, 5*num_rows_bar))

for i, col in enumerate(numeric_columns):
    row_idx = i // num_cols_bar
    col_idx = i % num_cols_bar

    sns.barplot(x=data.index, y=data[col], ax=axes_bar[row_idx, col_idx])
    axes_bar[row_idx, col_idx].set_title(f'Vertical Bar of {col}')

# Remove empty subplots
for i in range(len(numeric_columns), num_rows_bar * num_cols_bar):
    fig_bar.delaxes(axes_bar.flatten()[i])

plt.tight_layout()
plt.show()
# دا بطئ فوقفت الرن بتاعه


In [None]:
# features(x) and target(x)
x = data.drop('Heart Attack Risk', axis=1)
y = data['Heart Attack Risk']

# decision tree classification
print("\n**************** Decision tree classification ******************")
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
dtree = DecisionTreeClassifier(criterion='entropy') # gini or entropy
dtree.fit(x_train, y_train)
pred = dtree.predict(x_test)
accuracy = accuracy_score(y_test, pred)
print("Accuracy: ", accuracy)

In [None]:
# decision tree Visualization
dot_data = StringIO()
export_graphviz(dtree, out_file=dot_data, filled=True, rounded=True, feature_names=x.columns, class_names=['0', '1', '2'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
# graph.write_png('images/decisionTree/tree.png')
Image(graph.create_png())

In [None]:
# KNN classification
print("\n************ KNN classification *************")
model = KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)
pred = model.predict(x_test)
matrix = confusion_matrix(y_test, pred)
print("confusion_matrix:\n", matrix)

In [None]:
# accuracy score
acc = accuracy_score(y_test, pred)
print("accuracy: ", acc)
# precision
pre = precision_score(y_test, pred)
# recall
rec = recall_score(y_test, pred)
print("recall: ", rec)
# fi-measure
f1 = f1_score(y_test, pred)
print("f1-measure: ", f1)

In [None]:
# SVM classification
print("\n*********** SVM Classification *************")
SVM_Model = SVC(gamma='auto')
model.fit(x_train, y_train)
pred = model.predict(x_test)
matrix = confusion_matrix(y_test, pred)
print("confusion_matrix:\n", matrix)
# SVM accuracy
acc = accuracy_score(y_test, pred)
print("SVM Accuracy: ", acc)