<a href="https://colab.research.google.com/github/mridulhalder/Springboard/blob/main/ChurnPredictionTrainModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.session import Session
import boto3
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# --- Configuration ---
feature_group_name = "FG-ChurnPrediction-3f9a7b2c"
region = "ap-southeast-2"

# --- Initialize SageMaker and Boto3 Sessions ---
boto_session = boto3.Session(region_name=region)
sagemaker_session = Session(boto_session=boto_session)
featurestore_runtime = boto_session.client("sagemaker-featurestore-runtime", region_name=region)
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=sagemaker_session)

# --- Load Data from Feature Store Offline Store (S3) ---
# Get the offline store S3 URI
description = feature_group.describe()
offline_store_uri = description["OfflineStoreConfig"]["S3StorageConfig"]["S3Uri"]

# List all files in the offline store S3 location
import s3fs
fs = s3fs.S3FileSystem()
files = fs.glob(f"{offline_store_uri}/**/*.parquet")

# Read all Parquet files into a DataFrame
df = pd.concat([pd.read_parquet(f"s3://{file}") for file in files], ignore_index=True)

# --- Prepare Data ---
# Drop columns not needed for training
drop_cols = ["custid", "created", "firstorder", "lastorder"]  # keep only features
X = df.drop(columns=drop_cols + ["retained"])
y = df["retained"]

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Train Model ---
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# --- Evaluate Model ---
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))