# Register Pytorch Binary Classification Model in SAS Model Manager

#introduction
This notebook provides an example of how to build and train a simple Pytorch Binary Classification Model and then import the model into SAS Model Manager on SAS Viya 4 using a randomly generated dataset. Lines of code that must be modified by the user, such as directory paths or the host server are noted with comments in CAPITAL LETTERS.

Note: All the files necessary for this process will be created in the current directory this notebook is being run in.

In [2]:
import torch
import sasctl.pzmm as pzmm
import sasctl
from sasctl import Session
import torch.nn as nn
import torch.optim as optim
from sasctl.services import model_management as mm
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import getpass
import os

In [4]:
# Generate a sample pandas DataFrame
data = {'feature1': np.random.rand(100),
        'feature2': np.random.rand(100),
        'target': np.random.choice([1, 2], size=100)}

In [5]:
df = pd.DataFrame(data)

# Split the data into features and target
X = df[['feature1', 'feature2']].values
y = (df['target'] == 1).astype(int).values  # Convert to binary classification

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [8]:
# Define a simple neural network model
class BinaryClassificationModel(nn.Module):
    def __init__(self, input_size):
        super(BinaryClassificationModel, self).__init__()
        self.fc = nn.Linear(input_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc(x)
        x = self.sigmoid(x)
        return x

In [9]:
# Instantiate the model and define loss function and optimizer
input_size = X_train.shape[1]
model = BinaryClassificationModel(input_size)
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [10]:
# Training loop
epochs = 1000
for epoch in range(epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor.view(-1, 1))

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')


Epoch [100/1000], Loss: 0.7118
Epoch [200/1000], Loss: 0.7022
Epoch [300/1000], Loss: 0.6974
Epoch [400/1000], Loss: 0.6950
Epoch [500/1000], Loss: 0.6937
Epoch [600/1000], Loss: 0.6930
Epoch [700/1000], Loss: 0.6926
Epoch [800/1000], Loss: 0.6923
Epoch [900/1000], Loss: 0.6920
Epoch [1000/1000], Loss: 0.6918


In [11]:
# Save the trained model
torch.save(model.state_dict(), 'binaryclassification.pt')
print("Model saved successfully!")

Model saved successfully!


In [33]:
#Define the model file name and model name
prefixModelFile = 'pytorch_'
model_name = prefixModelFile + 'NotebookModel'

In [13]:
#Connect to specified server (EDM LATEST) ##CHANGE THIS BEFORE PUBLISH
server = 'edmlatest.ingress-nginx.edmtest-m1.edm.sashq-d.openstack.sas.com'
user = 'edmdev'

print('Password for the Server:')
p = getpass.getpass()

Password for the Server:


In [22]:
current_directory = os.getcwd()
# Input variables
inputVar = pd.DataFrame({'feature1': ['msg' * 33333], 'feature2': ['msg' * 33333]})
sasctl.pzmm.write_json_files.JSONFiles().write_var_json(input_data=inputVar, is_input=True, json_path=current_directory)
inputVarJSON = pd.read_json("inputVar.json")
print(inputVarJSON)

# Output variables
outputVar = pd.DataFrame({'prediction': ['msg' * 90], 'msg': ['msg' * 90]})
sasctl.pzmm.write_json_files.JSONFiles().write_var_json(input_data=outputVar, is_input=False, json_path=current_directory)
outputVarJSON = pd.read_json("outputVar.json")
print(outputVarJSON)


inputVar.json was successfully written and saved to C:\pytorchtest\inputVar.json
       name    level    type  length
0  feature1  nominal  string   99999
1  feature2  nominal  string   99999
outputVar.json was successfully written and saved to C:\pytorchtest\outputVar.json
         name    level    type  length
0  prediction  nominal  string     270
1         msg  nominal  string     270


In [23]:
# Define Input Output Variables and types for SAS Model Manager
project_input_variables = list(pd.DataFrame.to_dict(inputVarJSON.transpose()).values())
for var in project_input_variables:
    var["role"] = "input"
project_output_variables = list(pd.DataFrame.to_dict(outputVarJSON.transpose()).values())
for var in project_output_variables:
    var["role"] = "output"
project_variables = project_input_variables + project_output_variables

In [24]:
score_code = """
import torch
import torch.nn as nn
import numpy as np

# Define the BinaryClassificationModel class
class BinaryClassificationModel(nn.Module):
    def __init__(self, input_size):
        super(BinaryClassificationModel, self).__init__()
        self.fc = nn.Linear(input_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc(x)
        x = self.sigmoid(x)
        return x

# Load the saved model
loaded_model = BinaryClassificationModel(input_size=2)
loaded_model.load_state_dict(torch.load('binaryclassification.pt'))
loaded_model.eval()

# Function to score (make predictions) on new data
def score_model(new_data):
    new_data_tensor = torch.tensor(new_data, dtype=torch.float32)
    with torch.no_grad():
        predictions = loaded_model(new_data_tensor)
        predicted_labels = (predictions >= 0.5).float().view(-1).numpy()
    return predicted_labels

# Generate random new data
new_data = np.random.rand(10, 2)

# Score the model on the new data
predictions = score_model(new_data)

# Print the predictions
print("Predictions:", predictions)"""


# Specify the file path
file_path = os.path.join(current_directory, f'{model_name}.py')

# Open the file in write mode and write the score_code
with open(file_path, 'w') as file:
    file.write(score_code)

print(f"Python file created at: {file_path}")

Python file created at: C:\pytorchtest\myProject_myModel.py


In [None]:
requirements = """[
     {
        "step":"install pandas ",
        "command":"pip3 install pandas"
     },
     {
        "step":"install base64 ",
        "command":"pip3 install pybase64"
     },
     {
        "step":"install Pillow ",
        "command":"pip3 install Pillow"
     },
     {
        "step":"install sasctl ",
        "command":"pip3 install sasctl"
     },
     {
        "step":"install pytorch ",
        "command":"pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu"
     }
]"""
# Specify the file path
file_path = os.path.join(current_directory, 'requirements.json')

# Open the file in write mode and write the requirements
with open(file_path, 'w') as file:
    file.write(requirements)

print(f"Requirements file created at: {file_path}")

In [26]:
model_attr = dict()
model_attr['name'] = model_name


model_attr['toolVersion'] = '3.8.12'
model_attr['eventProbVar'] = 'target'
model_attr['targetLevel'] = 'BINARY'
model_attr['trainCodeType'] = 'Python'


project_name = 'TESTINGPYTORCHPROJECT'

In [None]:
from sasctl import Session
from sasctl.services import model_repository as modelRepo

try:
    # Establish a session with SAS Viya
    with Session(server, user, p, verify_ssl=False, protocol='http') as s:
        modelRepo.create_project(project=project_name, repository = 'Public', variables = project_variables,targetLevel = model_attr['targetLevel'],function='classification')
except Exception as e:
    print(e)

In [28]:
with Session(server, user, p, verify_ssl=False, protocol = 'http'):

    project = modelRepo.get_project(project_name)
    project['eventProbabilityVariable'] = model_attr['eventProbVar']
    project = modelRepo.update_project(project)

In [None]:
with Session(server, user, p, verify_ssl=False, protocol = 'http'):

    modelRepo.create_model(model=model_attr,project=project_name, modeler= 'User1', function='classification', 
                    algorithm='Pytorch efficientnet', tool='Python 3', target_variable=model_attr['eventProbVar'],
                    score_code_type= "python",
                    input_variables=list(pd.DataFrame.to_dict(inputVarJSON.transpose()).values()),
                    output_variables=list(pd.DataFrame.to_dict(outputVarJSON.transpose()).values()),
                    is_champion=True)

In [None]:
# Establish a session with SAS Viya
with Session(server, user, p, verify_ssl=False, protocol='http'):
    modelRepo.add_model_content(model=model_name, file=score_code,
                        name=model_name + '.py', role='score')
   
    # requirements for SCR environment
    with open('requirements.json', 'rb') as file:
        modelRepo.add_model_content(model=model_name, file=file,
                             name='requirements.json', role='python pickle')
    #input variables
    with open('inputVar.json', 'rb') as file:
        modelRepo.add_model_content(model=model_name, file=file,
                             name='inputVar.json')
    #output variables
    with open('outputVar.json', 'rb') as file:
        modelRepo.add_model_content(model=model_name, file=file,
                             name='outputVar.json')
    #saved model in current directory
    with open('binaryclassification.pt', 'rb') as file:
        modelRepo.add_model_content(model=model_name, file=file,
                             name='simple_food_model.pt', role='Python pickle')
    

In [None]:
from sasctl.services import model_management as mm
# Establish a session with SAS Viya
with Session(server, user, p, verify_ssl=False, protocol = 'http') as s:
    # Publish the model to the azure registry with git repo
    module = mm.publish_model(model_name, destination='scrInternal', force=True)

In [10]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    predicted_labels = (predictions >= 0.5).float().view(-1).numpy()

In [11]:
accuracy = np.mean(predicted_labels == y_test)
print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.4500
