# In this Notebook we shall create a Machine Learning Model using Scikit Learn 0.18

There are several different approaches and frameworks for predicting the likelihood of a customer to churn. In this notebook, we illustrate how scikit learn can be used for this purpose.

In [110]:
import sklearn
print(sklearn.__version__)

0.19.1


In [3]:
!pip install --user scikit-learn==0.19.1

Collecting scikit-learn==0.19.1
[?25l  Downloading https://files.pythonhosted.org/packages/3d/2d/9fbc7baa5f44bc9e88ffb7ed32721b879bfa416573e85031e16f52569bc9/scikit_learn-0.19.1-cp36-cp36m-manylinux1_x86_64.whl (12.4MB)
[K     |################################| 12.4MB 10.7MB/s eta 0:00:01
[?25hInstalling collected packages: scikit-learn
Successfully installed scikit-learn-0.19.1


In [4]:
!pip install /project_data/data_asset/MyLabelEncoder-1.2.0.401.post201912301918.tar.gz

Processing /project_data/data_asset/MyLabelEncoder-1.2.0.401.post201912301918.tar.gz
Building wheels for collected packages: MyLabelEncoder
  Building wheel for MyLabelEncoder (setup.py) ... [?25ldone
[?25h  Created wheel for MyLabelEncoder: filename=MyLabelEncoder-1.2.0.401.post202001102058-cp36-none-any.whl size=1905 sha256=d6c65943acb3e1fb7417c46fc8531c544b18e381cb940cb66f0de2d243dce2a6
  Stored in directory: /home/wsuser/.cache/pip/wheels/2b/69/0b/0d1a542a512bea2988b3dcba44df82c09221a2c26ab00cba7c
Successfully built MyLabelEncoder
Installing collected packages: MyLabelEncoder
Successfully installed MyLabelEncoder-1.2.0.401.post202001102058


In [111]:
import os
import numpy as np

#### Next, we read in a dataset that we will use to develop a Machine Learning model. 
#### We can read the data here in various ways. We are showing here how to read data from a CSV file.

In [112]:
import pandas as pd
df_data_1 = pd.read_csv('/project_data/data_asset/customers.csv')
df_data_1.head()



Unnamed: 0,ID,LONGDISTANCE,INTERNATIONAL,LOCAL,DROPPED,PAYMETHOD,LOCALBILLTYPE,LONGDISTANCEBILLTYPE,USAGE,RATEPLAN,CHURN,GENDER,STATUS,CHILDREN,ESTINCOME,CAROWNER,AGE
0,1,23,0,206,0,CC,Budget,Intnl_discount,229,3,T,F,S,1,38000.0,N,24.393333
1,6,29,0,45,0,CH,FreeLocal,Standard,75,2,F,M,M,2,29616.0,N,49.426667
2,8,24,0,22,0,CC,FreeLocal,Standard,47,3,F,M,M,0,19732.8,N,50.673333
3,11,26,0,32,1,CC,Budget,Standard,59,1,F,M,S,2,96.33,N,56.473333
4,17,12,0,46,4,CC,FreeLocal,Standard,58,1,F,M,M,2,53010.8,N,18.84


In [113]:
cmergedDf = df_data_1.copy()

In [114]:
# drop the ID field
cmergedDf.drop(['ID'], axis=1, inplace=True)

In [115]:
cmergedDf.head()

Unnamed: 0,LONGDISTANCE,INTERNATIONAL,LOCAL,DROPPED,PAYMETHOD,LOCALBILLTYPE,LONGDISTANCEBILLTYPE,USAGE,RATEPLAN,CHURN,GENDER,STATUS,CHILDREN,ESTINCOME,CAROWNER,AGE
0,23,0,206,0,CC,Budget,Intnl_discount,229,3,T,F,S,1,38000.0,N,24.393333
1,29,0,45,0,CH,FreeLocal,Standard,75,2,F,M,M,2,29616.0,N,49.426667
2,24,0,22,0,CC,FreeLocal,Standard,47,3,F,M,M,0,19732.8,N,50.673333
3,26,0,32,1,CC,Budget,Standard,59,1,F,M,S,2,96.33,N,56.473333
4,12,0,46,4,CC,FreeLocal,Standard,58,1,F,M,M,2,53010.8,N,18.84


In [116]:
cmergedDf.columns.tolist()

['LONGDISTANCE',
 'INTERNATIONAL',
 'LOCAL',
 'DROPPED',
 'PAYMETHOD',
 'LOCALBILLTYPE',
 'LONGDISTANCEBILLTYPE',
 'USAGE',
 'RATEPLAN',
 'CHURN',
 'GENDER',
 'STATUS',
 'CHILDREN',
 'ESTINCOME',
 'CAROWNER',
 'AGE']

In [117]:
from sklearn.model_selection import train_test_split
y = cmergedDf['CHURN']
X = cmergedDf.drop('CHURN', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [118]:
from sklearn.pipeline import Pipeline
#from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.preprocessing import LabelEncoder

#categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
categorical_features = cmergedDf.select_dtypes(include=['object']).drop(['CHURN'], axis=1).columns

In [119]:
cat_indices = [cmergedDf.columns.get_loc(c) for c in categorical_features]

In [120]:
df1 = cmergedDf.copy()

In [121]:
df1.head()

Unnamed: 0,LONGDISTANCE,INTERNATIONAL,LOCAL,DROPPED,PAYMETHOD,LOCALBILLTYPE,LONGDISTANCEBILLTYPE,USAGE,RATEPLAN,CHURN,GENDER,STATUS,CHILDREN,ESTINCOME,CAROWNER,AGE
0,23,0,206,0,CC,Budget,Intnl_discount,229,3,T,F,S,1,38000.0,N,24.393333
1,29,0,45,0,CH,FreeLocal,Standard,75,2,F,M,M,2,29616.0,N,49.426667
2,24,0,22,0,CC,FreeLocal,Standard,47,3,F,M,M,0,19732.8,N,50.673333
3,26,0,32,1,CC,Budget,Standard,59,1,F,M,S,2,96.33,N,56.473333
4,12,0,46,4,CC,FreeLocal,Standard,58,1,F,M,M,2,53010.8,N,18.84


In [122]:
from MyLabelEncoder import MyLabelEncoder
mm = MyLabelEncoder(columns = ['PAYMETHOD','LONGDISTANCEBILLTYPE']).fit_transform(df1)
mm.head()

Unnamed: 0,LONGDISTANCE,INTERNATIONAL,LOCAL,DROPPED,PAYMETHOD,LOCALBILLTYPE,LONGDISTANCEBILLTYPE,USAGE,RATEPLAN,CHURN,GENDER,STATUS,CHILDREN,ESTINCOME,CAROWNER,AGE
0,23,0,206,0,1,Budget,0,229,3,T,F,S,1,38000.0,N,24.393333
1,29,0,45,0,2,FreeLocal,1,75,2,F,M,M,2,29616.0,N,49.426667
2,24,0,22,0,1,FreeLocal,1,47,3,F,M,M,0,19732.8,N,50.673333
3,26,0,32,1,1,Budget,1,59,1,F,M,S,2,96.33,N,56.473333
4,12,0,46,4,1,FreeLocal,1,58,1,F,M,M,2,53010.8,N,18.84


In [123]:
categorical_transformer = Pipeline(steps=[('encoder', MyLabelEncoder())])


In [124]:
from sklearn.ensemble import RandomForestClassifier
rf = Pipeline(steps=[ ('cat',categorical_transformer),
                      ('classifier', RandomForestClassifier())])

In [125]:
print(rf._final_estimator)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


In [126]:
rf.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('cat', Pipeline(memory=None,
     steps=[('encoder', <MyLabelEncoder.MyLabelEncoder object at 0x7ff6ba428f28>)])), ('classifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_...n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False))])

In [127]:
y_pred = rf.predict(X_test)

In [128]:
print(y_pred)

['F' 'F' 'F' 'T' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'T' 'T' 'T'
 'F' 'F' 'T' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'T'
 'T' 'T' 'F' 'T' 'F' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'T' 'F' 'F'
 'F' 'F' 'F' 'F' 'T' 'F' 'F' 'F' 'F' 'T' 'F' 'T' 'F' 'T' 'F' 'T' 'F' 'F'
 'T' 'T' 'T' 'F' 'T' 'F' 'F' 'T' 'T' 'F' 'F' 'F' 'T' 'F' 'T' 'T' 'T' 'F'
 'T' 'F' 'T' 'T' 'F' 'F' 'T' 'T' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'T' 'F'
 'F' 'F' 'F' 'T' 'F' 'F' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'F' 'F' 'T' 'F'
 'F' 'F' 'F' 'F' 'T' 'T' 'F' 'F' 'T' 'T' 'F' 'F' 'T' 'F' 'F' 'F' 'T' 'F'
 'T' 'T' 'F' 'F' 'F' 'T' 'F' 'T' 'F' 'T' 'T' 'T' 'F' 'F' 'F' 'T' 'F' 'F'
 'T' 'F' 'T' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'F' 'T'
 'F' 'F' 'F' 'F' 'T' 'F' 'T' 'F' 'T' 'F' 'F' 'T' 'F' 'T' 'F' 'F' 'T' 'F'
 'F' 'T' 'F' 'F' 'F' 'F' 'F' 'F' 'T' 'F' 'T' 'T' 'F' 'T' 'F' 'F' 'F' 'F'
 'T' 'F' 'T' 'T' 'T' 'F' 'T' 'T' 'F' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'T' 'F'
 'F' 'F' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'F' 'F' 'T

In [129]:
print("model score: %.3f" % rf.score(X_test, y_test))

model score: 0.876


In [22]:
!pip install watson-machine-learning-client-V4



In [23]:
!pip install xgboost

Collecting xgboost
[?25l  Downloading https://files.pythonhosted.org/packages/c1/24/5fe7237b2eca13ee0cfb100bec8c23f4e69ce9df852a64b0493d49dae4e0/xgboost-0.90-py2.py3-none-manylinux1_x86_64.whl (142.8MB)
[K     |################################| 142.8MB 66.3MB/s eta 0:00:01   |###########                     | 49.4MB 6.4MB/s eta 0:00:15
Installing collected packages: xgboost
Successfully installed xgboost-0.90


In [130]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

In [131]:
#wml_credentials = {
#    "username": "ctp",
#    "password": "ctpctp",
#    "instance_id": "wml_local",
#     'url': 'https://mlpattern.184.170.232.151.nip.io',
#    ##"url": os.environ['RUNTIME_ENV_APSX_URL'],
#    "version": "2.5.0"
#}


token = os.environ['USER_ACCESS_TOKEN']
wml_credentials = {
    "token": token,
    "instance_id": "wml_local",
    "url": os.environ['RUNTIME_ENV_APSX_URL'],
    "version": "2.5.0"
}

In [132]:
client = WatsonMachineLearningAPIClient(wml_credentials)

In [44]:
# Obtain the UId of your space
def guid_from_space_name(client, space_name):
    instance_details = client.service_instance.get_details()
    space = client.spaces.get_details()
    return(next(item for item in space['resources'] if item['entity']["name"] == space_name)['metadata']['guid'])

In [133]:
space_id = client.spaces.store(meta_props={client.spaces.ConfigurationMetaNames.NAME: "MaggieChurn"})["metadata"]["guid"]

In [105]:
##Enter the name of your deployment space here:
space_uid = guid_from_space_name(client, 'maggie')
print("Space ID = " + space_id)
client.set.default_space(space_id)

StopIteration: 

In [134]:
client.set.default_space(space_id)

'SUCCESS'

In [135]:
print(space_id)

4e7f1a70-5cbe-4498-bd34-c32da82297fc


In [139]:
metadata = {
    client.repository.ModelMetaNames.NAME:"churn scikit",
    client.repository.ModelMetaNames.TYPE: "scikit-learn_0.19",
    client.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.19-py3.6",
    client.repository.ModelMetaNames.SPACE_UID: space_id
}


In [140]:
model_artifact = client.repository.store_model(rf, meta_props=metadata)

In [141]:
model_artifact = client.repository.store_model(rf, meta_props=metadata,training_data=X_train, training_target=y_train)