In [1]:
# File for classification

In [2]:
from os.path import join
from google.colab import drive

# path to your project on Google Drive
MY_GOOGLE_DRIVE_PATH = 'My Drive/Capstone Project'
ROOT = '/content/gdrive/'
drive.mount(ROOT)

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

from collections import Counter
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.compose import make_column_transformer

In [4]:
#Setting Random Seed
np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7f63f52dc690>

In [5]:
# import churn and telescope datasets
data_dir = ROOT + MY_GOOGLE_DRIVE_PATH + '/Edge/data/'

churn = pd.read_csv(data_dir + 'churn.csv')
churn.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)

telescope = pd.read_table(data_dir + 'telescope.dat', header=None, delimiter=',')
telescope.columns = ['FLength', 'FWidth', 'FSize', 'FConc', 'FConc1', 'FAsym', 
                     'FM3Long', 'FM3Trans', 'FAlpha', 'FDist', 'Class']

print('Shape - Churn: {} & Telescope: {}'.format(churn.shape, telescope.shape))

Shape - Churn: (10000, 11) & Telescope: (19020, 11)


In [6]:
churn.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [7]:
churn["Exited"].value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [8]:
churn["Geography"].value_counts()

France     5014
Germany    2509
Spain      2477
Name: Geography, dtype: int64

In [9]:
churn["Gender"].value_counts()

Male      5457
Female    4543
Name: Gender, dtype: int64

In [10]:
# preprocess churn & telescope data, convert numpy to tensor
preprocess = make_column_transformer(
    (OneHotEncoder(), ['Geography', 'Gender']), remainder = StandardScaler())

In [11]:
data = preprocess.fit_transform(churn.iloc[:, :-1])
target = np.array(churn.iloc[:, -1])
print('\nClass Distribution - Churn:', Counter(target))
data = data.astype(np.float32)
target = target.astype(np.long)

churn = {'data': torch.from_numpy(data), 'target': torch.from_numpy(target)}


Class Distribution - Churn: Counter({0: 7963, 1: 2037})


In [12]:
churn_dataset = torch.utils.data.TensorDataset(churn["data"],churn["target"])

In [13]:
telescope.head()

Unnamed: 0,FLength,FWidth,FSize,FConc,FConc1,FAsym,FM3Long,FM3Trans,FAlpha,FDist,Class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


In [14]:
telescope["Class"].value_counts()

g    12332
h     6688
Name: Class, dtype: int64

In [15]:
telescope.describe()

Unnamed: 0,FLength,FWidth,FSize,FConc,FConc1,FAsym,FM3Long,FM3Trans,FAlpha,FDist
count,19020.0,19020.0,19020.0,19020.0,19020.0,19020.0,19020.0,19020.0,19020.0,19020.0
mean,53.250154,22.180966,2.825017,0.380327,0.214657,-4.331745,10.545545,0.249726,27.645707,193.818026
std,42.364855,18.346056,0.472599,0.182813,0.110511,59.206062,51.000118,20.827439,26.103621,74.731787
min,4.2835,0.0,1.9413,0.0131,0.0003,-457.9161,-331.78,-205.8947,0.0,1.2826
25%,24.336,11.8638,2.4771,0.2358,0.128475,-20.58655,-12.842775,-10.849375,5.547925,142.49225
50%,37.1477,17.1399,2.7396,0.35415,0.1965,4.01305,15.3141,0.6662,17.6795,191.85145
75%,70.122175,24.739475,3.1016,0.5037,0.285225,24.0637,35.8378,10.946425,45.88355,240.563825
max,334.177,256.382,5.3233,0.893,0.6752,575.2407,238.321,179.851,90.0,495.561


In [16]:
scaler = StandardScaler()

In [17]:
data = scaler.fit_transform(telescope.iloc[:, :-1])
target = LabelEncoder().fit_transform(telescope.Class)
print('\nClass Distribution - Telescope:', Counter(target))

data = data.astype(np.float32)
target = target.astype(np.long)

telescope = {'data': torch.from_numpy(data), 'target': torch.from_numpy(target)}


Class Distribution - Telescope: Counter({0: 12332, 1: 6688})


In [18]:
telescope_dataset = torch.utils.data.TensorDataset(telescope["data"],telescope["target"])

In [19]:
#Setting Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [20]:
#hyperparameters

hidden_size = 5
num_classes = 2
num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [21]:
#Model Architecture
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [22]:
train_loader = torch.utils.data.DataLoader(dataset=churn_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

In [23]:
input_size = churn["data"].shape[1]
model_churn = NeuralNet(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_churn.parameters(), lr=learning_rate)  

In [24]:
print(model_churn)

NeuralNet(
  (fc1): Linear(in_features=13, out_features=5, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=5, out_features=2, bias=True)
)


In [25]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (features, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        features = features.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model_churn(features)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/5], Step [100/100], Loss: 0.5410
Epoch [2/5], Step [100/100], Loss: 0.4922
Epoch [3/5], Step [100/100], Loss: 0.4504
Epoch [4/5], Step [100/100], Loss: 0.3604
Epoch [5/5], Step [100/100], Loss: 0.4993


In [26]:
train_loader = torch.utils.data.DataLoader(dataset=telescope_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

In [27]:
input_size = telescope["data"].shape[1]
model_telescope = NeuralNet(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_telescope.parameters(), lr=learning_rate)  

In [28]:
print(model_telescope)

NeuralNet(
  (fc1): Linear(in_features=10, out_features=5, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=5, out_features=2, bias=True)
)


In [29]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (features, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        features = features.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model_telescope(features)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/5], Step [100/191], Loss: 0.6065
Epoch [2/5], Step [100/191], Loss: 0.4885
Epoch [3/5], Step [100/191], Loss: 0.3402
Epoch [4/5], Step [100/191], Loss: 0.4141
Epoch [5/5], Step [100/191], Loss: 0.4768
