### 準備

In [None]:
import boto3
import pandas as pd
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput

role = get_execution_role()

### データの準備

In [None]:
!aws s3 cp s3://redshift-downloads/redshift-ml/customer_activity/customer_activity.csv s3://20211117-redshiftml/redshift-ml/customer_churn/customer_activity.csv

In [None]:
data= pd.read_csv('s3://20211117-redshiftml/redshift-ml/customer_churn/customer_activity.csv')
model_data = data[['Churn?', 'Day Charge', 'Eve Charge', 'Night Charge', 'Intl Charge']]
# model_data = data.drop(['Day Charge', 'Eve Charge', 'Night Charge', 'Intl Charge', 'Record Date'], axis=1)
model_data = pd.get_dummies(model_data)
model_data = pd.concat([model_data['Churn?_True.'], model_data.drop(['Churn?_False.', 'Churn?_True.'], axis=1)], axis=1)
model_data.head()

In [None]:
train_data, validation_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data)), int(0.9 * len(model_data))])
train_data.to_csv('train.csv', header=False, index=False)
validation_data.to_csv('validation.csv', header=False, index=False)

sagemaker_session = sagemaker.Session()
input_train = sagemaker_session.upload_data(path='train.csv', bucket='20211117-redshiftml', key_prefix='redshift-ml/customer_churn_train')
input_validation = sagemaker_session.upload_data(path='validation.csv', bucket='20211117-redshiftml', key_prefix='redshift-ml/customer_churn_validation')

content_type='text/csv'
s3_input_train = TrainingInput(input_train, content_type=content_type)
s3_input_validation = TrainingInput(input_validation, content_type=content_type)

In [None]:
print('学習用データの格納先: ' ,input_train)

### XGBoost の学習

In [None]:
container = sagemaker.image_uris.retrieve("xgboost", boto3.Session().region_name, "1.2-1")
sess = sagemaker.Session()

hyperparameters = {"max_depth":"5",
                        "eta":"0.2",
                        "gamma":"4",
                        "min_child_weight":"6",
                        "subsample":"0.8",
                        "objective":"binary:logistic",
                        "num_round":"100"}

xgb = sagemaker.estimator.Estimator(container,
                                    role, 
                                    hyperparameters=hyperparameters,
                                    instance_count=1, 
                                    instance_type='ml.m4.xlarge',
                                    sagemaker_session=sess)

xgb.fit({'train': s3_input_train, 'validation': s3_input_validation}) 

### 推論用エンドポイントの準備

In [None]:
xgb_predictor = xgb.deploy(initial_instance_count = 1, instance_type = 'ml.m4.xlarge')