# Term Deposit Prediction - Unit Testing
Present an overview of tests to be conducted before a containerized model is released for deployment.

# Setup

In [None]:
# Install the gradio_client package
!pip install -q gradio_client

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.9/129.9 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Import necessary libraries
from gradio_client import Client  # Import the Gradio client for interacting with Gradio interfaces
from sklearn.model_selection import train_test_split  # Import train_test_split for splitting datasets
from sklearn.metrics import classification_report  # Import classification_report for model evaluation
from tqdm import tqdm  # Import tqdm for progress bars

In [None]:
client = Client("---PASTE THE Gradio APP API Link--------")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loaded as API: https://praneeth232-tele-test.hf.space ✔


# Baseline Checks

Test Data

In [None]:
import pandas as pd  # Import pandas for data manipulation
from sklearn.model_selection import train_test_split  # Import train_test_split for splitting datasets

# Read the dataset from the CSV file into a DataFrame
data_df = pd.read_csv("Bank_Telemarketing.csv")

# Drop the specified columns from the DataFrame
data_df = data_df.drop(columns=['customer_id', 'email_id', 'first_name', 'last_name'])

# Define numerical and categorical feature columns
numerical_features = data_df[['Age', 'Duration(Sec)', 'CC Contact Freq', 'Days Since PC', 'PC Contact Freq']].columns
categorical_features = data_df.select_dtypes(include=['object']).columns

# Separate features (X) and target variable (y)
X = data_df.drop('subscribed', axis=1)
y = data_df['subscribed']

# Split the dataset into training and testing sets
Xtrain, Xtest, ytrain, ytest = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

# Rearrange the columns in to numerical and categorical features
Xtest = Xtest[['Age', 'Duration(Sec)', 'CC Contact Freq', 'Days Since PC',
               'PC Contact Freq','Job', 'Marital Status', 'Education', 'Defaulter', 'Home Loan',
               'Personal Loan', 'Communication Type', 'Last Contacted', 'Day of Week',
               'PC Outcome']]


# Randomly sample 100 rows from the testing features dataset
Xtest_sample = Xtest.sample(100)
ytest_sample = ytest.loc[Xtest_sample.index]

# Convert the sampled rows from the DataFrame to a list of tuples
Xtest_sample_rows = list(Xtest_sample.itertuples(index=False, name=None))

Predictions on the test data

In [None]:
# Initialize an empty list to store baseline test predictions
baseline_test_predictions = []

# Iterate through the sampled rows and submit prediction requests to the Gradio interface
for row in tqdm(Xtest_sample_rows):
    try:
        # Submit a prediction request for the current row
        job = client.submit(
            age=row[0],
            duration=row[1],
            cc_contact_freq=row[2],
            days_since_pc=row[3],
            pc_contact_freq=row[4],
            job=row[5],
            marital_status=row[6],
            education=row[7],
            defaulter=row[8],
            home_loan=row[9],
            personal_loan=row[10],
            communication_type=row[11],
            last_contacted=row[12],
            day_of_week=row[13],
            pc_outcome=row[14],
            api_name="/predict"
        )

        # Get the prediction result and append it to baseline_test_predictions
        prediction = job.result()['label']
        baseline_test_predictions.append(int(prediction))

    except Exception as e:
        # Print any exceptions that occur during prediction
        print(e)

100%|██████████| 100/100 [00:38<00:00,  2.62it/s]


Estimation of accuracy on the test sample.

In [None]:
# Print the classification report comparing ytest_sample and baseline_test_predictions
print(classification_report(ytest_sample, baseline_test_predictions))

              precision    recall  f1-score   support

           0       0.90      0.98      0.94        87
           1       0.67      0.31      0.42        13

    accuracy                           0.89       100
   macro avg       0.79      0.64      0.68       100
weighted avg       0.87      0.89      0.87       100



If the F1-score is more than the existing baseline (human or a previous model version), we move on to unit tests.

## Test your understanding

What are the consequences of deploying a machine learning model to production without undergoing unit testing?

- The model will perform optimally and achieve high accuracy
- The model may produce incorrect predictions due to undetected bugs
- The model will automatically fix any issues during deployment
- Unit testing is not necessary for machine learning models.

# Unit Tests

## Perturbation tests

*Baseline*

In [None]:
# Submit a prediction request to the Gradio interface with the given input values
job = client.submit(
    age=3,
		duration=3,
		cc_contact_freq=3,
		days_since_pc=3,
		pc_contact_freq=3,
		job="admin.",
		marital_status="married",
		education="experience",
		defaulter="no",
		home_loan="yes",
		personal_loan="yes",
		communication_type="cellular",
		last_contacted="may",
		day_of_week="thu",
		pc_outcome="nonexistent",
    api_name="/predict"
)

In [None]:
print(job.result()['label'])

0


*Test (perturbed baseline)*

In [None]:
job = client.submit(
    age=4,
		duration=4,
		cc_contact_freq=3,
		days_since_pc=3,
		pc_contact_freq=3,
		job="admin.",
		marital_status="married",
		education="experience",
		defaulter="no",
		home_loan="yes",
		personal_loan="yes",
		communication_type="cellular",
		last_contacted="may",
		day_of_week="thu",
		pc_outcome="nonexistent",
    api_name="/predict"
)

In [None]:
print(job.result()['label'])

0


Output in the above cell indicates that the model is robust to minor variations in duration also with age

# Known edge-cases (criticial subgroups)

*Critical equipment state (known failure test case)*

In this scenario, a known edge case is that when a client spend 300 sec call coversation to make the customer purchase the subscription .Let us see if the model can recognize this success state.

In [None]:
job = client.submit(
    age=3,
		duration=390,
		cc_contact_freq=3,
		days_since_pc=3,
		pc_contact_freq=3,
		job="admin.",
		marital_status="married",
		education="experience",
		defaulter="no",
		home_loan="yes",
		personal_loan="yes",
		communication_type="cellular",
		last_contacted="may",
		day_of_week="thu",
		pc_outcome="success",
)

In [None]:
print(job.result()['label'])

1


Output in the above cell indiates that the model is able to correctly predict the success of a known edge case

More instances of such unit tests could be facilitated by presenting a simple interface to the tester like so:

In [None]:
# @title Unit Test Interface
age=3 # @param
duration=39 # @param
cc_contact_freq=3 # @param
days_since_pc=3 # @param
pc_contact_freq=3 # @param
job="admin." # @param ['admin.', 'blue-collar', 'technician', 'services', 'management','retired', 'entrepreneur', 'self-employed', 'housemaid', 'unemployed','student', 'unknown']
marital_status="married" # @param ['married', 'single', 'divorced', 'unknown']
education="experience" # @param ['experience', 'university degree', 'high school', 'professional.course','Others', 'illiterate']
defaulter="no" # @param ['no', 'unknown', 'yes']
home_loan="yes" # @param ['yes', 'no', 'unknown']
personal_loan="yes" # @param ['yes', 'no', 'unknown']
communication_type="cellular" # @param ['cellular', 'telephone']
last_contacted="may" # @param ['may', 'jul', 'aug', 'jun', 'nov', 'apr', 'oct', 'mar', 'sep', 'dec']
day_of_week="thu" # @param ['thu', 'mon', 'wed', 'tue', 'fri']
pc_outcome="success" # @param ['nonexistent', 'failure', 'success']

job = client.submit(
      age = age,
      duration = duration,
      cc_contact_freq = cc_contact_freq,
      days_since_pc = days_since_pc,
      pc_contact_freq = pc_contact_freq,
      job = job,
      marital_status = marital_status,
      education = education,
      defaulter = defaulter,
      home_loan = home_loan,
      personal_loan = personal_loan,
      communication_type = communication_type,
      last_contacted = last_contacted,
      day_of_week = day_of_week,
      pc_outcome = pc_outcome,
    api_name="/predict"
)

customer_purchase = 'Yes' if job.result()['label'] == '1' else 'No'
print(f"customer purchase?: {customer_purchase}")

customer purchase?: No


If the unit tests pass, the model is ready to be tagged for release to staging and production.