In [None]:
import boto3
import pandas as pd
from sklearn.model_selection import train_test_split
from sagemaker import Session
from sagemaker.estimator import Estimator

In [None]:
# Define AWS resources
s3_bucket = "recommender-system-demo"
s3_prefix = "training-data"
sagemaker_role = "arn:aws:iam::YOUR_ACCOUNT_ID:role/service-role/AmazonSageMaker-ExecutionRole"

# Initialize boto3 clients
s3_client = boto3.client('s3')


In [None]:
# Step 1: Load and preprocess data
def load_and_preprocess_data():
    # Load dataset
    movies_file = f"s3://{s3_bucket}/processed/movies.csv"
    ratings_file = f"s3://{s3_bucket}/processed/ratings.csv"

    movies_df = pd.read_csv(movies_file)
    ratings_df = pd.read_csv(ratings_file)

    # Merge datasets
    data = pd.merge(ratings_df, movies_df, on="movie_id")
    
    # Prepare for training
    data = data[["user_id", "movie_id", "rating"]]

    # Encode user_id and movie_id as integers
    data["user_id"] = data["user_id"].astype("category").cat.codes
    data["movie_id"] = data["movie_id"].astype("category").cat.codes

    # Split data
    train, test = train_test_split(data, test_size=0.2, random_state=42)

    # Save preprocessed data to S3
    train.to_csv("train.csv", index=False)
    test.to_csv("test.csv", index=False)

    s3_client.upload_file("train.csv", s3_bucket, f"{s3_prefix}/train/train.csv")
    s3_client.upload_file("test.csv", s3_bucket, f"{s3_prefix}/test/test.csv")

    print("Data preprocessing completed and uploaded to S3.")

In [None]:

# Step 2: Train model
def train_model():
    session = Session()

    # Define estimator
    estimator = Estimator(
        image_uri="683313688378.dkr.ecr.us-east-1.amazonaws.com/factorization-machines:latest",
        role=sagemaker_role,
        instance_count=1,
        instance_type="ml.m5.large",
        output_path=f"s3://{s3_bucket}/output"
    )

    # Specify input data
    train_input = f"s3://{s3_bucket}/{s3_prefix}/train"
    test_input = f"s3://{s3_bucket}/{s3_prefix}/test"

    estimator.fit({
        "train": train_input,
        "test": test_input
    })

    print("Model training completed.")


In [None]:
# Step 3: Deploy model
def deploy_model():
    # Deploy the model
    predictor = estimator.deploy(
        initial_instance_count=1,
        instance_type="ml.m5.large",
        endpoint_name="recommender-endpoint"
    )

    print(f"Model deployed at endpoint: recommender-endpoint")
    return predictor

# Execute the pipeline
if __name__ == "__main__":
    load_and_preprocess_data()
    train_model()
    deploy_model()