### Personalized Telco Plans Using Sagemaker and Anthropic's Claude 3.5 Sonnet on Amazon Bedrock

In [3]:
# Import necessary libraries
import io
import csv
import boto3
import json
import time
import pandas as pd
import random
from datetime import datetime, timedelta
import sqlalchemy as sa
from botocore.exceptions import ClientError
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.datasets import dump_svmlight_file
import sagemaker
from sagemaker import get_execution_role
from sagemaker import image_uris
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput
from sagemaker.xgboost.estimator import XGBoost
from sqlalchemy import create_engine

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [5]:
# Step 1: Initialize AWS Services and Set Up Data Paths

# In this step, we initialize the necessary AWS services and set up the paths for our data files. This includes creating a SageMaker session, defining an S3 bucket, and initializing the Bedrock client.

# Why this step is important:
#- SageMaker session allows us to interact with SageMaker services
#- S3 bucket is used for storing our training data and model artifacts
#- Bedrock client is used for generating personalized recommendations later in the process

# Create a SageMaker session
sagemaker_session = sagemaker.Session()

# Define the S3 bucket
s3_bucket = sagemaker_session.default_bucket()

# Initialize the Bedrock client
bedrock = boto3.client(
    service_name='bedrock-runtime',
    region_name='us-east-1'  # Replace with your preferred region
)

# File paths for output CSV files
customer_data_csv = 'customer_data.csv'
plans_and_addons_csv = 'plans_and_addons.csv'

# Create a SageMaker session
sagemaker_session = sagemaker.Session()

# Define the S3 bucket
s3_bucket = sagemaker_session.default_bucket()

# Initialize the Bedrock client
bedrock = boto3.client(
    service_name='bedrock-runtime',
    region_name='us-east-1'  # Replace with your preferred region
)

# File paths for output CSV files
customer_data_csv = 'customer_data.csv'
plans_and_addons_csv = 'plans_and_addons.csv'

In [None]:
## Step 2: Generate Synthetic Customer Data

# In this step, we create a function to generate synthetic customer profiles and use it to create a dataset of 1000 customers. This data is then saved to a CSV file.

# Why this step is important:
#- Synthetic data allows us to test and develop our model without using real customer data
#- It helps in creating a diverse dataset with various customer profiles and usage patterns
#- Saving to CSV allows easy storage and retrieval of the data for future steps

def generate_customer_profile():
    # Function implementation here...

# Generate and write customer data to CSV
with open(customer_data_csv, 'w', newline='') as csvfile:
    # CSV writing logic here...

print("Customer data generated and saved to CSV.")

In [None]:
## Step 3: Data Processing and Feature Engineering

# In this step, we load the synthetic customer data, process it, and engineer features for our machine learning model. We also split the data into training and test sets.

# Why this step is important:
# Data processing ensures our data is in the correct format for model training
# Feature engineering helps in creating more informative inputs for our model
# Splitting the data allows us to train our model and then test its performance on unseen data

# Load the synthetic customer data for modeling
customer_data = pd.read_csv('customer_data.csv')

# Data processing and feature engineering
# ... (data processing steps) ...

# Splitting the data into training and test sets
X = customer_data[['avg_data_usage', 'avg_call_minutes', 'avg_sms_count', 'monthly_bill', 'loyalty_years']]
y = customer_data['upgrade_plan']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Save the train and test data as CSV
train_data.to_csv('train.csv', index=False)
test_data.to_csv('test.csv', index=False)

print("Data split into training and validation sets and saved to CSV.")

In [None]:
## Step 4: Prepare Data for SageMaker XGBoost

# In this step, we convert our data to the LIBSVM format, which is required by SageMaker's XGBoost algorithm. We then upload this data to S3 for use in training.

### Why this step is important:
#- LIBSVM format is efficient for storing sparse data and is required by SageMaker's XGBoost implementation
#- Uploading to S3 makes the data accessible to SageMaker for training

# Convert data to LIBSVM format
train_libsvm_file = 'train.libsvm'
test_libsvm_file = 'test.libsvm'
dump_svmlight_file(X_train, y_train, train_libsvm_file, zero_based=False)
dump_svmlight_file(X_test, y_test, test_libsvm_file, zero_based=False)

# Upload LIBSVM files to S3
train_input = sagemaker_session.upload_data(train_libsvm_file, bucket=s3_bucket, key_prefix='telco-recommendation/train')
test_input = sagemaker_session.upload_data(test_libsvm_file, bucket=s3_bucket, key_prefix='telco-recommendation/test')

print("Data uploaded to S3 for SageMaker.")
print(f"Train data S3 path: {train_input}")
print(f"Test data S3 path: {test_input}")

In [None]:
## Step 5: Configure XGBoost Hyperparameters

#In this step, we set up the hyperparameters for our XGBoost model. These parameters control various aspects of the model's behavior during training.

### Why this step is important:
#- Hyperparameters significantly influence the model's performance and generalization ability
#- Properly tuned hyperparameters can lead to better predictions and reduced overfitting

# Initialize hyperparameters
hyperparameters = {
    "max_depth":"5",
    "eta":"0.2",
    "gamma":"4",
    "min_child_weight":"6",
    "subsample":"0.7",
    "objective":"reg:squarederror",
    "num_round":"50"
}

In [None]:
## Step 6: Set Up SageMaker XGBoost Estimator

#Here, we create a SageMaker estimator for the XGBoost algorithm. This estimator will be used to train our model on the data we prepared earlier.

### Why this step is important:
#- The estimator encapsulates the training process and model artifacts
#- It allows us to specify the compute resources to be used for training
#- It provides an interface to interact with the trained model later

# Set an output path for the trained model
bucket = sagemaker.Session().default_bucket()
prefix = 'DEMO-xgboost-as-a-built-in-algo'
output_path = f's3://{bucket}/{prefix}/telco-xgb-built-in-algo/output'

# Construct a SageMaker estimator
estimator = sagemaker.estimator.Estimator(
    image_uri='683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1',
    hyperparameters=hyperparameters,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m5.2xlarge',
    volume_size=5, # 5 GB
    output_path=output_path
)

In [None]:
## Step 7: Train the XGBoost Model

#In this step, we use the estimator to train our XGBoost model on the prepared data.

### Why this step is important:
#- This is where the actual learning happens - the model learns to predict customer plan upgrades based on their usage data
#- The trained model can then be used to make predictions on new, unseen data

# Execute the XGBoost training job
estimator.fit({'train': train_input, 'validation': test_input})

In [None]:
## Step 8: Deploy and Evaluate the Model

#After training, we deploy the model to a SageMaker endpoint and evaluate its performance using various metrics.

### Why this step is important:
#- Deploying the model makes it available for real-time predictions
#- Evaluating the model helps us understand its performance and whether it meets our requirements
#- Multiple metrics provide a comprehensive view of the model's strengths and weaknesses

# Deploy the trained model to an endpoint
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

# Generate predictions
predictions = predictor.predict(test_libsvm_data.getvalue(), initial_args={'ContentType': 'text/x-libsvm'})

# Evaluate the model
accuracy = accuracy_score(y_test, binary_predictions)
precision = precision_score(y_test, binary_predictions)
recall = recall_score(y_test, binary_predictions)
roc_auc = roc_auc_score(y_test, deserialized_predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"ROC AUC: {roc_auc}")

# Delete the endpoint to avoid unnecessary charges
predictor.delete_endpoint()

In [None]:
## Step 9: Generate Personalized Recommendations

#In this step, we use Amazon Bedrock to generate personalized recommendations for customers based on their usage data and available plans.

### Why this step is important:
#- Personalized recommendations can significantly improve customer satisfaction and retention
#- It demonstrates how machine learning models can be combined with natural language processing for practical applications

def get_personalized_recommendation(customer_data, plan_addons):
    # Function implementation here...

# Generate recommendations for a sample of customers
with open('customer_data.csv', 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        customer_data = {k: v for k, v in row.items()}
        recommendation = get_personalized_recommendation(customer_data, plans_and_addons_dict)
        if recommendation:
            print(f"Recommendation for {customer_data['name']}:")
            print(recommendation)
            print("\n")
       
        # Break after 2 recommendations for this example
        if recommendation_count >= 2:
            break

# Save recommendations to SQL Database
recommendations_df.to_sql('recommendations', engine, if_exists='replace', index=False, dtype=dtype)

# Save to CSV
recommendations_df.to_csv('customer_recommendations.csv', index=False)

# Save to JSON (DynamoDB format)
with open('recommendations_dynamo.json', 'w') as f:
    json.dump(recommendations_dynamodb, f)

print("Recommendations saved in multiple formats.")