In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
import boto3

In [2]:
print(pd.__version__)

0.20.3


In [3]:
s3 = boto3.resource('s3')
s3.Bucket('loonyclassification').download_file('datasets/Churn_Modelling.csv', 'datasets/Churn_Modelling.csv')

In [4]:
DATASET_NAME = "datasets/Churn_Modelling.csv"
CSV_COLUMNS=['CustomerId','Surname','CreditScore', 'Geography',
             'Gender', 'Age', 'Tenure', 'Balance', 
             'NumOfProducts', 'HasCrCard', 'IsActiveMember',
             'EstimatedSalary','Exited']
df = pd.read_csv(
      DATASET_NAME,
      names=CSV_COLUMNS,
      skipinitialspace=True,
      skiprows=1)

In [5]:
df.head()

Unnamed: 0,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
print(len(df))

10000


In [7]:
def preprocess(members_in):
    
    members=members_in.copy(deep=True)
    
    del members['CustomerId']
    del members['Surname']

    labelencoder_1 = LabelEncoder()
    members['Geography'] = labelencoder_1.fit_transform(members['Geography'])
    labelencoder_2 = LabelEncoder()
    members['Gender'] = labelencoder_2.fit_transform(members['Gender'])

  
    return members

membership = preprocess(df)
membership.describe()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,650.5288,0.7463,0.5457,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,96.653299,0.827529,0.497932,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,350.0,0.0,0.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,584.0,0.0,0.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,652.0,0.0,1.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,718.0,1.0,1.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,850.0,2.0,1.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [8]:
df.EstimatedSalary.describe()

count     10000.000000
mean     100090.239881
std       57510.492818
min          11.580000
25%       51002.110000
50%      100193.915000
75%      149388.247500
max      199992.480000
Name: EstimatedSalary, dtype: float64

In [9]:
trainsize = int(len(membership['Exited']) * 0.7)
validsize = int(len(membership['Exited']) * 0.15)

df_train = membership.iloc[:trainsize, :]
df_valid = membership.iloc[trainsize:(trainsize+validsize), :]
df_test = membership.iloc[(trainsize+validsize):, :]

In [10]:
df_train.Exited.describe()

count    7000.000000
mean        0.206143
std         0.404563
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         1.000000
Name: Exited, dtype: float64

In [11]:
df_test.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
8501,660,0,1,37,2,97324.91,1,1,0,23291.83,0
8502,678,2,1,29,6,0.0,2,1,0,64443.75,0
8503,530,0,1,37,8,0.0,2,1,1,287.99,0
8504,559,0,0,48,2,0.0,2,0,1,137961.41,0
8505,624,0,1,42,3,145155.37,1,1,0,72169.95,1


In [12]:
df_train.to_csv('datasets/churn-train.csv', index=False, header=False)
df_valid.to_csv('datasets/churn-valid.csv', index=False, header=False)
df_test.to_csv('datasets/churn-test.csv', index=False, header=False)

In [13]:
s3.Bucket('loonyclassification').upload_file('datasets/churn-train.csv', 'datasets/churn-train.csv')
s3.Bucket('loonyclassification').upload_file('datasets/churn-valid.csv', 'datasets/churn-valid.csv')
s3.Bucket('loonyclassification').upload_file('datasets/churn-test.csv', 'datasets/churn-test.csv')

In [14]:
from sagemaker import get_execution_role

#Bucket location to save your custom code in tar.gz format.
custom_code_upload_location = 's3://loonyclassification/customcode/tensorflow_churn'

#Bucket location where results of model training are saved.
model_artifacts_location = 's3://loonyclassification/artifacts'

#IAM execution role that gives SageMaker access to resources in your AWS account.
role = get_execution_role()

In [15]:
from sagemaker.tensorflow import TensorFlow

churn_estimator = TensorFlow(entry_point='churn_dnn_classifier_helper.py',
                            role=role,
                            output_path=model_artifacts_location,
                            code_location=custom_code_upload_location,
                            train_instance_count=1,
                            train_instance_type='ml.c4.xlarge',
                            training_steps=100,
                            evaluation_steps=10)

  return f(*args, **kwds)


In [16]:
train_data_location = 's3://loonyclassification/datasets/'

In [17]:
churn_estimator.fit(train_data_location)

INFO:sagemaker:Creating training-job with name: sagemaker-tensorflow-py2-cpu-2018-03-27-03-19-53-739


..................................................................
[31mexecuting startup script (first run)[0m
[31m2018-03-27 03:25:18,825 INFO - root - running container entrypoint[0m
[31m2018-03-27 03:25:18,826 INFO - root - starting train task[0m
[31m2018-03-27 03:25:20,791 INFO - botocore.vendored.requests.packages.urllib3.connectionpool - Starting new HTTP connection (1): 169.254.170.2[0m
[31m2018-03-27 03:25:21,858 INFO - botocore.vendored.requests.packages.urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com[0m
[31m2018-03-27 03:25:21,980 INFO - botocore.vendored.requests.packages.urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com[0m
[31mINFO:tensorflow:----------------------TF_CONFIG--------------------------[0m
[31mINFO:tensorflow:{"environment": "cloud", "cluster": {"master": ["algo-1:2222"]}, "task": {"index": 0, "type": "master"}}[0m
[31mINFO:tensorflow:-------------------------------------------------------

In [18]:
churn_predictor = churn_estimator.deploy(initial_instance_count=1,
                                       instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: sagemaker-tensorflow-py2-cpu-2018-03-27-03-19-53-739
INFO:sagemaker:Creating endpoint with name sagemaker-tensorflow-py2-cpu-2018-03-27-03-19-53-739


---------------------------------------------------------------------------------------------------------------!

In [19]:
churn_predictor.predict([588.,1.,1.,41.,2.,131341.46,2.,0.,1.,7034.94])

{'result': {'classifications': [{'classes': [{'label': '0',
      'score': 0.6063506007194519},
     {'label': '1', 'score': 0.39364945888519287}]}]}}