In [3]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC  # Import the Support Vector Classifier

# Assuming X_train and y_train are your training data
# For SVM, you might want to scale your data
# Here's an example using StandardScaler from sklearn
from sklearn.preprocessing import StandardScaler


In [None]:
data = pd.read_csv('cleandata.csv')
data = data.drop(data.columns[0], axis=1)
X = data.drop('death', axis=1)
y = data['death']

cat_columns = ['sex', 'race', 'dnr', 'primary', 'disability', 'income', 'extraprimary', 'cancer']

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
X[cat_columns] = X[cat_columns].astype("category")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.dtypes

In [None]:
#Use the Logistic Regression model
model = LogisticRegression()


# Fit the model using the training data
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred

In [None]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
norm_conf_matrix = confusion_matrix(y_test,y_pred,normalize='true')

In [None]:
accuracy

In [None]:
conf_matrix

In [None]:
sns.set(font_scale=1.2)

# Create a heatmap of the confusion matrix
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False, square=True,
            xticklabels=["Predicted 0", "Predicted 1"],
            yticklabels=["Actual 0", "Actual 1"])

# Add labels and a title
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix Heatmap')

# Display the plot
plt.show()

In [None]:
sns.set(font_scale=1.2)

# Create a heatmap of the confusion matrix
sns.heatmap(norm_conf_matrix, annot=True, cmap="Blues", cbar=False, square=True,
            xticklabels=["Predicted 0", "Predicted 1"],
            yticklabels=["Actual 0", "Actual 1"])

# Add labels and a title
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix Heatmap')

# Display the plot
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Assuming 'model' is a trained Logistic Regression model
feature_importance = model.coef_[0]  # Extracting the coefficients of the model

# Matching coefficients with feature names
feature_names = X_train.columns  # Assuming X_train is your training data after one-hot encoding

# Creating a dataframe to store feature names and their importance (coefficients)
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance})
feature_importance_df['AbsImportance'] = feature_importance_df['Importance'].abs()

# Sorting features based on absolute coefficient values
feature_importance_df = feature_importance_df.sort_values(by='AbsImportance', ascending=False)

# Plotting feature importance
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance_df.head(10))  # Plotting top 10 features
plt.title('Feature Importance (Logistic Regression)')
plt.xlabel('Coefficient')
plt.ylabel('Feature')
plt.show()
#This code snippet extracts the coefficients from the trained logistic regression model and visualizes the top important features based on the absolute magnitude of the coefficients using a bar plot.

#Please note that interpreting feature importance in logistic regression is based on the coefficients' magnitude. Features with larger coefficients (either positive or negative) have a more significant impact on the model's predictions. Adjust the number of features to be displayed as needed by changing the argument in head() function.







In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)

In [None]:
import shap
import matplotlib.pyplot as plt

# Assuming 'model' is a trained Logistic Regression model
shap_explainer = shap.Explainer(model, X_train)
shap_values = shap_explainer(X_test)

# Summarizing SHAP values
shap.summary_plot(shap_values, X_test, plot_type='bar')

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

In [None]:
plt.figure(figsize=(8, 6))
plt.step(recall, precision, color='b', where='post', lw=2)
plt.fill_between(recall, precision, alpha=0.2, color='b')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall Curve (AP = {:.2f})'.format(average_precision))
plt.show()

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test, y_pred)
average_precision = average_precision_score(y_test, y_pred)

In [None]:
plt.figure(figsize=(8, 6))
plt.step(recall, precision, color='b', where='post', lw=2)
plt.fill_between(recall, precision, alpha=0.2, color='b')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall Curve (AP = {:.2f})'.format(average_precision))
plt.show()

In [None]:
# n_splits = 5
# skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

In [None]:
# for fold, (train_idx, valid_idx) in enumerate(skf.split(X, y)):
#     X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
#     y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]

#     train_data = lgb.Dataset(X_train, label=y_train)
#     valid_data = lgb.Dataset(X_valid, label=y_valid, reference=train_data)

#     params['verbose'] = 100  # Adjust verbosity here

#     model = lgb.train(params, train_data, valid_sets=[train_data, valid_data])

#     y_pred = model.predict(X_valid, num_iteration=model.best_iteration)
#     y_pred_binary = np.round(y_pred)  # Convert to binary predictions

#     accuracy = accuracy_score(y_valid, y_pred_binary)
#     print(f"Fold {fold + 1}, Accuracy: {accuracy}")

#     # You can save or use the models for further analysis
#     # model.save_model(f"model_fold_{fold + 1}.txt")