## Importing Necessary Modules

In [1]:
# Import the dependencies
import os
from pathlib import Path
import pandas as pd

In [2]:
data = Path("../Resources/cleaned_encoded_skin_metadata.csv")
df = pd.read_csv(data)
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,diagnosis,result,result_label
0,118,3113,2,3,80.0,1,11,2,0,Benign
1,118,724,2,3,80.0,1,11,2,0,Benign
2,2710,2463,2,3,80.0,1,11,2,0,Benign
3,2710,1355,2,3,80.0,1,11,2,0,Benign
4,1460,7327,2,3,75.0,1,4,2,0,Benign


In [3]:
# Separate the Features (X) from the Target (y)
y = df["result"]
X = df.copy()
X.drop(["lesion_id","image_id","dx","diagnosis", "result", "result_label"], axis=1, inplace=True)
X.head()

Unnamed: 0,dx_type,age,sex,localization
0,3,80.0,1,11
1,3,80.0,1,11
2,3,80.0,1,11
3,3,80.0,1,11
4,3,75.0,1,4


In [4]:
# Split our data into training and testing
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
X_train.shape

(7511, 4)

In [5]:
# Create a Logistic Regression Model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)
classifier

In [6]:
# Fit (train) or model using the training data
lr_model = classifier.fit(X_train, y_train)

In [7]:
# Score the model using the test data
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.8066835308214618
Testing Data Score: 0.8210862619808307


In [8]:
# Make predictions
predictions = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True)
results.head(10)

Unnamed: 0,Prediction,Actual
0,0,0
1,0,0
2,0,0
3,0,1
4,0,1
5,0,1
6,0,0
7,1,0
8,1,0
9,1,1


In [9]:
# Calculate the Accuracy Score
from sklearn.metrics import accuracy_score
# Display the accuracy score for the test dataset.
accuracy_score(y_test, predictions)

0.8210862619808307

In [10]:
#Generate training predictions
training_predictions = lr_model.predict(X_train)

#Generate testing predictions
testing_predictions = classifier.predict(X_test)


In [11]:
# Import the model for sklearn confusion matrix
from sklearn.metrics import confusion_matrix

# Create and save the confusion matrix for the training data
training_matrix = confusion_matrix(y_train, training_predictions)

# Print the confusion matrix for the training data
print(training_matrix)

[[5426  513]
 [ 939  633]]


In [12]:
# Create and save the confusion matrix for the testing data
test_matrix = confusion_matrix(y_test, testing_predictions)

# Print the confusion matrix for the testing data
print(test_matrix)

[[1805  175]
 [ 273  251]]


In [13]:
from sklearn.metrics import classification_report
# Create and save the training classification report
training_report = classification_report(y_train, training_predictions)

# Print the training classification report
print(training_report)

              precision    recall  f1-score   support

           0       0.85      0.91      0.88      5939
           1       0.55      0.40      0.47      1572

    accuracy                           0.81      7511
   macro avg       0.70      0.66      0.67      7511
weighted avg       0.79      0.81      0.79      7511



In [14]:
# Create and save the testing classification report
testing_report = classification_report(y_test, testing_predictions)

# Print the testing classification report
print(testing_report)

              precision    recall  f1-score   support

           0       0.87      0.91      0.89      1980
           1       0.59      0.48      0.53       524

    accuracy                           0.82      2504
   macro avg       0.73      0.70      0.71      2504
weighted avg       0.81      0.82      0.81      2504

