# In this Notebook we shall create a Machine Learning Model using Scikit Learn 0.19

There are several different approaches and frameworks for predicting the likelihood of a customer to churn. In this notebook, we illustrate how scikit learn can be used for this purpose.

In [1]:
import sklearn
print(sklearn.__version__)

0.19.1


In [57]:
!pip install --user scikit-learn==0.19.1



In [2]:
!pip install /project_data/data_asset/MyLabelEncoder-1.2.0.401.post202002061325.tar.gz

Processing /project_data/data_asset/MyLabelEncoder-1.2.0.401.post202002061325.tar.gz
Building wheels for collected packages: MyLabelEncoder
  Building wheel for MyLabelEncoder (setup.py) ... [?25ldone
[?25h  Created wheel for MyLabelEncoder: filename=MyLabelEncoder-1.2.0.401.post202002072042-cp36-none-any.whl size=1679 sha256=c8b959944a64d18de3cf8870568242babed9084a54f510440c0b7549e56d7631
  Stored in directory: /home/wsuser/.cache/pip/wheels/d3/bc/f1/1f23e77e8996fe556b3c6414a4087bf6b4ee9f12814b5e8580
Successfully built MyLabelEncoder
Installing collected packages: MyLabelEncoder
  Found existing installation: MyLabelEncoder 1.2.0.401.post202002072009
    Uninstalling MyLabelEncoder-1.2.0.401.post202002072009:
      Successfully uninstalled MyLabelEncoder-1.2.0.401.post202002072009
Successfully installed MyLabelEncoder-1.2.0.401.post202002072042


In [3]:
import os
import numpy as np

#### Next, we read in a dataset that we will use to develop a Machine Learning model. 
#### We can read the data here in various ways. We are showing here how to read data from a CSV file.

In [4]:
import pandas as pd
df_data_1 = pd.read_csv('/project_data/data_asset/customers.csv')
df_data_1.head()



Unnamed: 0,ID,LONGDISTANCE,INTERNATIONAL,LOCAL,DROPPED,PAYMETHOD,LOCALBILLTYPE,LONGDISTANCEBILLTYPE,USAGE,RATEPLAN,CHURN,GENDER,STATUS,CHILDREN,ESTINCOME,CAROWNER,AGE
0,1,23,0,206,0,CC,Budget,Intnl_discount,229,3,T,F,S,1,38000.0,N,24.393333
1,6,29,0,45,0,CH,FreeLocal,Standard,75,2,F,M,M,2,29616.0,N,49.426667
2,8,24,0,22,0,CC,FreeLocal,Standard,47,3,F,M,M,0,19732.8,N,50.673333
3,11,26,0,32,1,CC,Budget,Standard,59,1,F,M,S,2,96.33,N,56.473333
4,17,12,0,46,4,CC,FreeLocal,Standard,58,1,F,M,M,2,53010.8,N,18.84


In [5]:
cmergedDf = df_data_1.copy()

In [6]:
# drop the ID field
cmergedDf.drop(['ID'], axis=1, inplace=True)

In [7]:
cmergedDf.head()

Unnamed: 0,LONGDISTANCE,INTERNATIONAL,LOCAL,DROPPED,PAYMETHOD,LOCALBILLTYPE,LONGDISTANCEBILLTYPE,USAGE,RATEPLAN,CHURN,GENDER,STATUS,CHILDREN,ESTINCOME,CAROWNER,AGE
0,23,0,206,0,CC,Budget,Intnl_discount,229,3,T,F,S,1,38000.0,N,24.393333
1,29,0,45,0,CH,FreeLocal,Standard,75,2,F,M,M,2,29616.0,N,49.426667
2,24,0,22,0,CC,FreeLocal,Standard,47,3,F,M,M,0,19732.8,N,50.673333
3,26,0,32,1,CC,Budget,Standard,59,1,F,M,S,2,96.33,N,56.473333
4,12,0,46,4,CC,FreeLocal,Standard,58,1,F,M,M,2,53010.8,N,18.84


In [8]:
cmergedDf.columns.tolist()

['LONGDISTANCE',
 'INTERNATIONAL',
 'LOCAL',
 'DROPPED',
 'PAYMETHOD',
 'LOCALBILLTYPE',
 'LONGDISTANCEBILLTYPE',
 'USAGE',
 'RATEPLAN',
 'CHURN',
 'GENDER',
 'STATUS',
 'CHILDREN',
 'ESTINCOME',
 'CAROWNER',
 'AGE']

In [9]:
from sklearn.model_selection import train_test_split
y = cmergedDf['CHURN']
X = cmergedDf.drop('CHURN', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [10]:
from sklearn.pipeline import Pipeline
#from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.preprocessing import LabelEncoder

#categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
categorical_features = cmergedDf.select_dtypes(include=['object']).drop(['CHURN'], axis=1).columns

In [11]:
cat_indices = [cmergedDf.columns.get_loc(c) for c in categorical_features]

In [12]:
df1 = cmergedDf.copy()

In [13]:
df1.head()

Unnamed: 0,LONGDISTANCE,INTERNATIONAL,LOCAL,DROPPED,PAYMETHOD,LOCALBILLTYPE,LONGDISTANCEBILLTYPE,USAGE,RATEPLAN,CHURN,GENDER,STATUS,CHILDREN,ESTINCOME,CAROWNER,AGE
0,23,0,206,0,CC,Budget,Intnl_discount,229,3,T,F,S,1,38000.0,N,24.393333
1,29,0,45,0,CH,FreeLocal,Standard,75,2,F,M,M,2,29616.0,N,49.426667
2,24,0,22,0,CC,FreeLocal,Standard,47,3,F,M,M,0,19732.8,N,50.673333
3,26,0,32,1,CC,Budget,Standard,59,1,F,M,S,2,96.33,N,56.473333
4,12,0,46,4,CC,FreeLocal,Standard,58,1,F,M,M,2,53010.8,N,18.84


In [15]:
import mylabelencoder
categorical_transformer = Pipeline(steps=[('encoder', mylabelencoder.MyLabelEncoder(cols=['PAYMETHOD', 'LOCALBILLTYPE', 'LONGDISTANCEBILLTYPE', 'GENDER', 'STATUS', 'CAROWNER']))])


In [16]:
from sklearn.ensemble import RandomForestClassifier
rf = Pipeline(steps=[ ('cat',categorical_transformer),
                      ('classifier', RandomForestClassifier())])

  from numpy.core.umath_tests import inner1d


In [17]:
print(rf._final_estimator)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


In [18]:
rf.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('cat', Pipeline(memory=None,
     steps=[('encoder', <mylabelencoder.MyLabelEncoder object at 0x7f991857a128>)])), ('classifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_...n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False))])

In [19]:
y_pred = rf.predict(X_test)

In [20]:
print(y_pred)

['F' 'F' 'T' 'F' 'F' 'F' 'F' 'F' 'T' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'F' 'T'
 'F' 'T' 'T' 'F' 'T' 'T' 'T' 'T' 'F' 'T' 'F' 'F' 'T' 'T' 'F' 'T' 'T' 'F'
 'F' 'T' 'T' 'F' 'F' 'F' 'T' 'T' 'T' 'T' 'F' 'F' 'F' 'F' 'T' 'F' 'T' 'F'
 'F' 'T' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'F' 'T' 'T' 'F' 'F' 'T' 'F'
 'T' 'F' 'F' 'F' 'F' 'T' 'T' 'T' 'T' 'F' 'F' 'F' 'T' 'F' 'F' 'T' 'T' 'F'
 'F' 'F' 'F' 'T' 'F' 'F' 'F' 'T' 'T' 'T' 'F' 'F' 'F' 'T' 'T' 'T' 'F' 'F'
 'F' 'T' 'F' 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'T' 'T' 'F' 'T' 'F' 'F' 'T' 'F'
 'T' 'F' 'F' 'T' 'T' 'F' 'F' 'F' 'F' 'T' 'F' 'F' 'F' 'T' 'F' 'T' 'T' 'F'
 'T' 'F' 'F' 'T' 'T' 'F' 'F' 'F' 'F' 'T' 'F' 'F' 'F' 'T' 'T' 'T' 'T' 'T'
 'T' 'F' 'T' 'F' 'T' 'T' 'F' 'T' 'F' 'F' 'T' 'F' 'T' 'F' 'T' 'F' 'F' 'F'
 'F' 'T' 'F' 'T' 'T' 'T' 'F' 'T' 'T' 'F' 'F' 'F' 'F' 'F' 'F' 'T' 'F' 'F'
 'T' 'T' 'T' 'T' 'F' 'T' 'F' 'F' 'F' 'T' 'F' 'F' 'F' 'F' 'F' 'F' 'F' 'F'
 'F' 'F' 'T' 'F' 'F' 'T' 'F' 'T' 'F' 'F' 'F' 'T' 'T' 'T' 'T' 'F' 'F' 'F'
 'F' 'F' 'T' 'F' 'F' 'F' 'T' 'F' 'T' 'F' 'T' 'T' 'F

In [21]:
print("model score: %.3f" % rf.score(X_test, y_test))

model score: 0.961


In [22]:
!pip install watson-machine-learning-client-V4



In [23]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

In [24]:
token = os.environ['USER_ACCESS_TOKEN']
wml_credentials = {
    "token": token,
    "instance_id": "wml_local",
    "url": os.environ['RUNTIME_ENV_APSX_URL'],
    "version": "2.5.0"
}

In [25]:
client = WatsonMachineLearningAPIClient(wml_credentials)

In [26]:
# Obtain the UId of your space
def guid_from_space_name(client, space_name):
    instance_details = client.service_instance.get_details()
    space = client.spaces.get_details()
    return(next(item for item in space['resources'] if item['entity']["name"] == space_name)['metadata']['guid'])

In [27]:
your_HOL_USERID ="<cp4d_login_userid>"

space_name="Space@CP4D_By_" + your_HOL_USERID

space_id = client.spaces.store(meta_props={client.spaces.ConfigurationMetaNames.NAME: space_name})["metadata"]["guid"]

In [28]:
client.set.default_space(space_id)

'SUCCESS'

In [29]:
metadata = {
    client.repository.ModelMetaNames.NAME:"Skmodel-trained@CP4D_By_"+your_HOL_USERID,
    client.repository.ModelMetaNames.TYPE: "scikit-learn_0.19",
    client.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.19-py3.6",
    client.repository.ModelMetaNames.SPACE_UID: space_id
}


In [30]:
model_artifact = client.repository.store_model(rf, meta_props=metadata,training_data=X_train, training_target=y_train)

print ("Your CP4D Space name: " + space_name)
print ("Your CP4D Model name: " + "Skmodel-trained@CP4D_By_"+your_HOL_USERID)
print ("Your model is saved successfully")

Your CP4D Space name: Space@CP4D_By_HOLUserid
Your CP4D Model name: Skmodel-trained@CP4D_By_HOLUserid
Your model is saved successfully
