In [None]:
# sagemaker_demo.ipynb

import pandas as pd
import boto3
import sagemaker
from sagemaker import get_execution_role
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib
import os

# Load dataset
data = pd.read_csv('../data/sample_dataset.csv')
X = data[['feature1', 'feature2', 'feature3']]
y = data['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a simple model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate
accuracy = model.score(X_test, y_test)
print(f"Model accuracy: {accuracy:.2f}")

# Save the model locally
os.makedirs('model', exist_ok=True)
joblib.dump(model, 'model/model.joblib')

# Upload to S3 for SageMaker (optional)
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
prefix = 'sagemaker-demo-model'

s3_model_path = sagemaker_session.upload_data('model/model.joblib', bucket=bucket, key_prefix=prefix)
print(f"Model uploaded to S3: s3://{bucket}/{prefix}/model.joblib")
