# 🚢 Titanic Survival Predictor - SageMaker Project
This notebook follows the full ML lifecycle using Amazon SageMaker.

## 📥 Step 1: Download Titanic Dataset and Upload to S3

In [None]:
import pandas as pd, sagemaker

# Load dataset
df = pd.read_csv("https://github.com/sogah2023/AI-ML/blob/main/Titanic%20Dataset.csv?raw=true")
df.to_csv("titanic.csv", index=False)

# Upload to S3
bucket = sagemaker.Session().default_bucket()
s3_path = sagemaker.Session().upload_data("titanic.csv", key_prefix="titanic/raw")
print("Uploaded to:", s3_path)

## 🔎 Step 2: Exploratory Data Analysis

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(df.corr(), annot=True)
plt.title("Feature Correlation Heatmap")
plt.show()

## ⚙️ Step 3: Train a Model using XGBoost Estimator

In [None]:
from sagemaker.estimator import Estimator
role = sagemaker.get_execution_role()

xgb = Estimator(
    image_uri=sagemaker.image_uris.retrieve("xgboost", region=sagemaker.Session().boto_region_name, version="1.3-1"),
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path=f"s3://{bucket}/output"
)

# You'll need to prepare train/test data in S3 and set input channels
# xgb.fit({'train': 's3://.../train.csv', 'validation': 's3://.../validation.csv'})

## 🚀 Step 4: Deploy the Model and Invoke Endpoint

In [None]:
# predictor = xgb.deploy(initial_instance_count=1, instance_type='ml.m5.large')
# result = predictor.predict([[3, 'male', 22, 1, 0, 7.25]])
# print(result)

## 📈 Step 5: Monitor with Model Monitor and Clarify (Optional Steps)

In [None]:
# from sagemaker.model_monitor import DataCaptureConfig
# capture_config = DataCaptureConfig(
#     enable_capture=True,
#     sampling_percentage=100,
#     destination_s3_uri=f"s3://{bucket}/monitor/"
# )

## 🔄 Step 6: Automate with SageMaker Pipelines (Structure Only)

In [None]:
# from sagemaker.workflow.pipeline import Pipeline
# pipeline = Pipeline(name='TitanicPipeline', steps=[...])
# pipeline.upsert(role_arn=role)
# pipeline.start()