# Notes from MIT

#### Read more : http://introtodeeplearning.com/slides/6S191_MIT_DeepLearning_L1.pdf

# TensorFlow 2.3

#### TensorFlow is an end-to-end open source platform for machine learning. It has a comprehensive, flexible ecosystem of tools, libraries and community resources that lets researchers push the state-of-the-art in ML and developers easily build and deploy ML powered applications.


#### https://www.tensorflow.org/

# Keras

 > https://keras.io/
        

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv('data/Churn_Modelling.csv')
df.head(3)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1


In [3]:
X = df.drop(labels = ['RowNumber', 'CustomerId', 'Surname', 'Exited'],axis = 1)
y = df['Exited']

In [4]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [5]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int64  
 1   Geography        10000 non-null  object 
 2   Gender           10000 non-null  object 
 3   Age              10000 non-null  int64  
 4   Tenure           10000 non-null  int64  
 5   Balance          10000 non-null  float64
 6   NumOfProducts    10000 non-null  int64  
 7   HasCrCard        10000 non-null  int64  
 8   IsActiveMember   10000 non-null  int64  
 9   EstimatedSalary  10000 non-null  float64
dtypes: float64(2), int64(6), object(2)
memory usage: 781.4+ KB


In [6]:
X = pd.get_dummies(X, columns = ['Geography','Gender'])


In [7]:
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,1,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,0,0,1,0
3,699,39,1,0.0,2,0,0,93826.63,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,1,1,0


In [8]:
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)


In [9]:
X_train.shape

(8000, 13)

In [10]:
X.shape[1]

13

In [11]:
%%time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

CPU times: user 1.72 s, sys: 113 ms, total: 1.83 s
Wall time: 1.83 s


In [12]:
model = Sequential()
model.add(Dense(50, activation = 'relu', input_dim=X.shape[1]))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))

In [13]:
%%time
model.compile(optimizer = 'adam',
             loss = 'binary_crossentropy',
             metrics = ['accuracy'])

CPU times: user 15.1 ms, sys: 0 ns, total: 15.1 ms
Wall time: 15.2 ms


In [14]:
%%time
model.fit(X_train,y_train.to_numpy(),
         batch_size = 10,
         epochs = 10,
         verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 26.3 s, sys: 2.46 s, total: 28.8 s
Wall time: 19.7 s


<tensorflow.python.keras.callbacks.History at 0x7f03cc7da450>

In [15]:
y_pred = model.predict_classes(X_test)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [16]:
model.evaluate(X_test, y_test.to_numpy())



[0.37249499559402466, 0.8479999899864197]

In [17]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [18]:
confusion_matrix(y_test,y_pred)

array([[1499,   94],
       [ 210,  197]])

In [19]:
accuracy_score(y_test, y_pred)

0.848

# vs RandomForest

In [20]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier()
clf.fit(X_train ,y_train)

RandomForestClassifier()

In [21]:
y_pred = clf.predict(X_test)


In [22]:
accuracy_score(y_test,y_pred)

0.8615

In [23]:
confusion_matrix(y_test, y_pred)

array([[1537,   56],
       [ 221,  186]])