In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import metrics
import pickle
import boto3
import os

In [None]:
# Initializing S3 access
key_id = os.environ.get("AWS_ACCESS_KEY_ID")
secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
endpoint = os.environ.get("AWS_S3_ENDPOINT")
bucket_name = os.environ.get("AWS_S3_BUCKET")
data_bucket_key = "processed_customer_churn_data"
model_bucket_key = "gradient_boost_model"

session = boto3.session.Session(aws_access_key_id=key_id, aws_secret_access_key=secret_key)
s3 = session.resource(service_name='s3', endpoint_url=endpoint)

In [None]:
# Load the processed data
pickled_data = s3.Bucket(bucket_name).Object(data_bucket_key).get()['Body'].read()
X_train, X_test, y_train, y_test = pickle.loads(pickled_data)

In [None]:
# Create and train the model
xgb_model = GradientBoostingClassifier()
xgb_model.fit(X_train, y_train)

In [None]:
# Test accuracy
preds = xgb_model.predict(X_test)
accuracy = metrics.accuracy_score(y_test, preds)
print(accuracy)

In [None]:
# Upload the model and performance to S3
model_and_performance = (xgb_model, accuracy)
pickled_data = pickle.dumps(model_and_performance, protocol=pickle.HIGHEST_PROTOCOL)
s3.Bucket(bucket_name).put_object(Key=model_bucket_key, Body=pickled_data)