## Importing Necessary Modules

In [None]:
# Import the dependencies
import os
from pathlib import Path
import pandas as pd
from sklearn.preprocessing import LabelEncoder,MinMaxScaler

In [None]:
data = Path("../Resources/cleaned_skin_metadata.csv")
df = pd.read_csv(data)
df.head()

In [None]:
# Drop columns that are not needed for modeling
df = df.drop(['lesion_id', 'image_id', 'dx','dx_type','diagnosis', 'result'], axis=1)

# Encode categorical variables using LabelEncoder
label_encoder = LabelEncoder()
df['sex'] = label_encoder.fit_transform(df['sex'])
df['localization'] = label_encoder.fit_transform(df['localization'])

In [None]:
# Assuming 'result_label' is the target variable
X = df.drop('result_label', axis=1)
y = df['result_label']

In [None]:
# Split our data into training and testing
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
X_train.shape

In [None]:
# Normalize only numeric columns using MinMaxScaler
numeric_columns = X.select_dtypes(include=['float64', 'int64']).columns
scaler = MinMaxScaler()
X_train[numeric_columns] = scaler.fit_transform(X_train[numeric_columns])
X_test[numeric_columns] = scaler.transform(X_test[numeric_columns])

In [None]:
# Create a Logistic Regression Model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)
classifier

In [None]:
# Fit (train) or model using the training data
lr_model = classifier.fit(X_train, y_train)

In [None]:
# Score the model using the test data
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

In [None]:
# Make predictions
predictions = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True)
results.head(10)

In [None]:
# Calculate the Accuracy Score
from sklearn.metrics import accuracy_score
# Display the accuracy score for the test dataset.
accuracy_score(y_test, predictions)

In [None]:
#Generate training predictions
training_predictions = lr_model.predict(X_train)

#Generate testing predictions
testing_predictions = classifier.predict(X_test)


In [None]:
# Import the model for sklearn confusion matrix
from sklearn.metrics import confusion_matrix

# Create and save the confusion matrix for the training data
training_matrix = confusion_matrix(y_train, training_predictions)

# Print the confusion matrix for the training data
print(training_matrix)

In [None]:
# Create and save the confusion matrix for the testing data
test_matrix = confusion_matrix(y_test, testing_predictions)

# Print the confusion matrix for the testing data
print(test_matrix)

In [None]:
from sklearn.metrics import classification_report
# Create and save the training classification report
training_report = classification_report(y_train, training_predictions)

# Print the training classification report
print(training_report)

In [None]:
# Create and save the testing classification report
testing_report = classification_report(y_test, testing_predictions)

# Print the testing classification report
print(testing_report)