In [2]:
#Naive bayes:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv('Heart_Attack_Dataset.csv')

# Separate features and target variable
X = data.drop(['Patient ID', 'Heart Attack Risk'], axis=1)
y = data['Heart Attack Risk']

# Encode categorical variables
label_encoders = {}
for column in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define different split ratios
split_ratios = {
    "70:30": 0.3,
    "50:50": 0.5,
    "60:40": 0.4,
    "80:20": 0.2
}

# Initialize an empty dictionary to store accuracy for each split
accuracy_results = {}

# Apply Naive Bayes model for each split ratio
for ratio_name, test_size in split_ratios.items():
    # Split data based on current test_size ratio
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # Train Naive Bayes model
    nb_model = GaussianNB()
    nb_model.fit(X_train, y_train)

    # Predict and calculate accuracy
    y_pred = nb_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred) * 100  # Convert accuracy to percentage

    # Store the accuracy
    accuracy_results[ratio_name] = accuracy

# Convert results into a DataFrame for easy visualization
accuracy_df = pd.DataFrame(list(accuracy_results.items()), columns=["Split Ratio", "Accuracy (%)"])

# Print the statement
print("Naive Bayes has been applied to the dataset, and these are the accuracy results for different split ratios:")
print(accuracy_df)

# Save the result to a CSV file
file_name = "naive_bayes_accuracy_splits.csv"
accuracy_df.to_csv(file_name, index=False)

print(f'Results saved in {file_name}')









Naive Bayes has been applied to the dataset, and these are the accuracy results for different split ratios:
  Split Ratio  Accuracy (%)
0       70:30     64.036667
1       50:50     64.228000
2       60:40     64.090000
3       80:20     64.205000
Results saved in naive_bayes_accuracy_splits.csv


In [6]:
#Logistic Regression:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

# Load the dataset
file_path = 'Heart_Attack_Dataset.csv'
heart_data = pd.read_csv(file_path)

# Step 1: Drop irrelevant columns
columns_to_drop = ["Patient ID", "Country", "Continent", "Hemisphere"]
heart_data_cleaned = heart_data.drop(columns=columns_to_drop)

# Step 2: Handle categorical variables
categorical_columns = ["Sex", "Diet", "Blood Pressure"]
encoder = LabelEncoder()
for col in categorical_columns:
    heart_data_cleaned[col] = encoder.fit_transform(heart_data_cleaned[col])

# Step 3: Separate features and target variable
X = heart_data_cleaned.drop(columns=["Heart Attack Risk"])
y = heart_data_cleaned["Heart Attack Risk"]

# Step 4: Standardize numerical features
numerical_columns = X.select_dtypes(include=["float64", "int64"]).columns
scaler = StandardScaler()
X[numerical_columns] = scaler.fit_transform(X[numerical_columns])

# Step 5: Function to split, train, and evaluate logistic regression
def train_evaluate_logistic_regression(X, y, train_ratio):
    # Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=1-train_ratio, random_state=42
    )
    # Train logistic regression model
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train)
    # Make predictions
    y_pred = model.predict(X_test)
    # Calculate metrics
    metrics = {
        "Accuracy": accuracy_score(y_test, y_pred) * 100
    }
    return metrics

# Step 6: Evaluate model with different split ratios
ratios = [0.5, 0.6, 0.7, 0.8]
results = {f"{int(ratio*100)}:{int((1-ratio)*100)}": train_evaluate_logistic_regression(X, y, ratio) for ratio in ratios}

# Display results
# Display results with metrics in a single line
# Change 'naive_bayes_accuracies' to 'results'
for split, metrics in results.items():  # Iterate through results dictionary
    accuracy = metrics['Accuracy']  # Access accuracy from metrics dictionary
    print(f"Split Ratio {split} - Accuracy: {accuracy:.2f}%")

Split Ratio 50:50 - Accuracy: 64.23%
Split Ratio 60:40 - Accuracy: 64.09%
Split Ratio 70:30 - Accuracy: 64.03%
Split Ratio 80:19 - Accuracy: 64.20%


In [5]:
#linear regression:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = 'Heart_Attack_Dataset.csv'
heart_data = pd.read_csv(file_path)

# Step 1: Drop irrelevant columns
columns_to_drop = ["Patient ID", "Country", "Continent", "Hemisphere"]
heart_data_cleaned = heart_data.drop(columns=columns_to_drop)

# Step 2: Handle categorical variables
categorical_columns = ["Sex", "Diet", "Blood Pressure"]
encoder = LabelEncoder()
for col in categorical_columns:
    heart_data_cleaned[col] = encoder.fit_transform(heart_data_cleaned[col])

# Step 3: Separate features and target variable
X = heart_data_cleaned.drop(columns=["Heart Attack Risk"])
y = heart_data_cleaned["Heart Attack Risk"]

# Step 4: Standardize numerical features
numerical_columns = X.select_dtypes(include=["float64", "int64"]).columns
scaler = StandardScaler()
X[numerical_columns] = scaler.fit_transform(X[numerical_columns])

# Step 5: Function to split, train, and evaluate linear regression
def train_evaluate_linear_regression_accuracy(X, y, train_ratio):
    # Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=1-train_ratio, random_state=42
    )
    # Train linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)
    # Make predictions
    y_pred = model.predict(X_test)
    # Round predictions to 0 or 1
    y_pred_rounded = [round(pred) for pred in y_pred]
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred_rounded) * 100  # Convert to percentage
    return accuracy

# Step 6: Evaluate linear regression with different split ratios
ratios = [0.5, 0.6, 0.7, 0.8]
linear_regression_accuracies = {
    f"{int(ratio*100)}:{int((1-ratio)*100)}": train_evaluate_linear_regression_accuracy(X, y, ratio)
    for ratio in ratios
}

# Step 7: Display results
for split, accuracy in linear_regression_accuracies.items():
    print(f"Split Ratio {split} - Accuracy: {accuracy:.2f}%")

Split Ratio 50:50 - Accuracy: 64.23%
Split Ratio 60:40 - Accuracy: 64.09%
Split Ratio 70:30 - Accuracy: 64.03%
Split Ratio 80:19 - Accuracy: 64.20%


In [4]:
#KNN:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv('Heart_Attack_Dataset.csv')

# Separate features and target variable
X = data.drop(['Patient ID', 'Heart Attack Risk'], axis=1)
y = data['Heart Attack Risk']

# Encode categorical variables
label_encoders = {}
for column in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define different split ratios
split_ratios = {
    "70:30": 0.3,
    "50:50": 0.5,
    "60:40": 0.4,
    "80:20": 0.2
}

# Initialize an empty dictionary to store accuracy for each split
accuracy_results = {}

# Apply KNN model for each split ratio
for ratio_name, test_size in split_ratios.items():
    # Split data based on current test_size ratio
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # Train KNN model
    knn_model = KNeighborsClassifier(n_neighbors=5)  # You can adjust n_neighbors as needed
    knn_model.fit(X_train, y_train)

    # Predict and calculate accuracy
    y_pred = knn_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred) * 100  # Convert accuracy to percentage

    # Store the accuracy
    accuracy_results[ratio_name] = accuracy

# Convert results into a DataFrame for easy visualization
accuracy_df = pd.DataFrame(list(accuracy_results.items()), columns=["Split Ratio", "Accuracy (%)"])

# Print the statement
print("K-Nearest Neighbors has been applied to the dataset, and these are the accuracy results for different split ratios:")
print(accuracy_df)

# Save the result to a CSV file
file_name = "knn_accuracy_splits.csv"
accuracy_df.to_csv(file_name, index=False)

print(f'Results saved in {file_name}')

K-Nearest Neighbors has been applied to the dataset, and these are the accuracy results for different split ratios:
  Split Ratio  Accuracy (%)
0       70:30     99.266667
1       50:50     96.656000
2       60:40     98.542500
3       80:20     99.775000
Results saved in knn_accuracy_splits.csv
