# Evaluating a Machine Model in SageMaker Studio

In [None]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

## Load the Data into Pandas

In [None]:
# Load the CSV data into a DataFrame
file_path = Path("Resources/german_credit_data.csv")
df = pd.read_csv(file_path)

# Display sample data
df.head()

## Data Preprocessing

### Encode categorical variables using one-hot encoding

In [None]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

In [None]:
# Create a list of categorical variables
categorical_variables = ["Housing", "Saving accounts", "Checking account", "Purpose", "Risk"]

In [None]:
# Encode categorical variables using OneHotEncoder
encoded_data = enc.fit_transform(df[categorical_variables])

In [None]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names(categorical_variables)
)

# Display sample data
encoded_df.head()

In [None]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
encoded_df = pd.concat(
    [
        df[["Age", "Job", "Credit amount", "Duration"]],
        encoded_df
    ],
    axis=1
)

# Display sample data
encoded_df.head()

### Creating the Features and Target Sets

In [None]:
# Creating the features set X
X = encoded_df.drop(columns=["Risk_bad", "Risk_good"])

# Display sample data
X.head()

In [None]:
# Creating the target set y
y = encoded_df["Risk_bad"]

# Display sample data
y.head()

### Split the features and target sets into training and testing datasets

In [None]:
# Split the preprocessed data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

### Use the Scikit-Learn’s StandardScaler to scale the features data

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Creating a Machine Learning Model in SageMaker Studio

### Importing the Required Libraries

In [None]:
# Import Amazon SageMaker libraries and modules
import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import get_execution_role
from sagemaker.predictor import csv_serializer, json_deserializer

# Import AWS Python SDK
import boto3

# Import support libraries
import io
import os
import json
import numpy as np

### Configuring General Settings for the SageMaker Model

In [None]:
# Set the S3 bucket name


In [None]:
# Set a prefix for the data files


In [None]:
# Set the IAM execution role


### Uploading the Training and Testing Data to Amazon S3

#### Encode and upload the training data

In [None]:
# Encode the training data as Protocol Buffer
buf = io.BytesIO()
vectors = np.array(X_train_scaled).astype("float32")
labels = np.array(y_train).astype("float32")
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)

# Upload encoded training data to Amazon S3
key = 'linear_train.data'
boto3.resource("s3").Bucket(bucket).Object(os.path.join(prefix, "train", key)).upload_fileobj(buf)
s3_train_data = "s3://{}/{}/train/{}".format(bucket, prefix, key)
print("Training data uploaded to: {}".format(s3_train_data))

#### Encode and upload the testing data

In [None]:
# Encode the testing data as Protocol Buffer
buf = io.BytesIO()
vectors = np.array(X_test_scaled).astype("float32")
labels = np.array(y_test).astype("float32")
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)

# Upload encoded testing data to Amazon S3
key = "linear_test.data"
boto3.resource("s3").Bucket(bucket).Object(os.path.join(prefix, "test", key)).upload_fileobj(buf)
s3_test_data = "s3://{}/{}/test/{}".format(bucket, prefix, key)
print("Testing data uploaded to: {}".format(s3_test_data))

### Specify the Amazon SageMaker Session to Use

In [None]:
# Save the current session in a variable


### Create an Instance of the Machine Learning Model

In [None]:
# Import the get_image_uri module from the sagemaker library
from sagemaker.amazon.amazon_estimator import get_image_uri

In [None]:
# Import the container image


In [None]:
# Create an instance of the machine learning model


### Define Linear Learner Hyperparameters

In [None]:
# Get the dimension of the feature-input vector


In [None]:
# Define linear learner hyperparameters
# Note how in this case we use: predictor_type='binary_classifier' # (credit risk: good or bad)


## Fitting a Machine Learning Model in SageMaker Studio

In [None]:
# Fitting the linear learner model


## Making Predictions With the Model in SageMaker Studio

### Deploying the Model

In [None]:
# Deploy an instance of the linear learner model to create a predictor


### Setting Configurations for the Predictor

In [None]:
# Linear predictor configurations


### Making Predictions Using Testing Data

In [None]:
# Making some predictions using the test data


In [None]:
# Display sample predictions


In [None]:
# Create a list with the predicted values

# Transforming the list into an array

# Display sample data


## Evaluating a Machine Model in SageMaker Studio

In [None]:
# Import the classification report from Scikit-learn
from sklearn.metrics import classification_report

In [None]:
# Display classification report
print("Classification report")
print(classification_report(y_test, y_predictions))

## Delete the End-Point to Avoid Additional AWS Resources Usage and Billing

Make sure that you delete all the Amazon SageMaker endpoints to prevent unwanted charges.

In [None]:
# Delete Amazon SageMaker end-point
sagemaker.Session().delete_endpoint(linear_predictor.endpoint)