# Binary-classification Task

In [None]:
!pip install polars[pandas]

In [2]:
import numpy
import pandas # or use `polars`

## Data Loading and Processing

### Load Dataset from XLS File by IO Process

In [3]:
df = pandas.read_excel('../data/credit_card_clients.xls', header=1, index_col=0)
df.head()

Unnamed: 0_level_0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,20000,2,2,1,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1
2,120000,2,2,2,26,-1,2,0,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
3,90000,2,2,2,34,0,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
4,50000,2,2,1,37,0,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
5,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


### Convert Any Format of Data to Numpy

In [4]:
df = df.to_numpy()

### Obtain Features and Labels for Subsequent Training

In [5]:
features, labels = df[:,:-1], df[:,-1]
features, labels

(array([[ 20000,      2,      2, ...,      0,      0,      0],
        [120000,      2,      2, ...,   1000,      0,   2000],
        [ 90000,      2,      2, ...,   1000,   1000,   5000],
        ...,
        [ 30000,      1,      2, ...,   4200,   2000,   3100],
        [ 80000,      1,      3, ...,   1926,  52964,   1804],
        [ 50000,      1,      2, ...,   1000,   1000,   1000]], dtype=int64),
 array([1, 1, 0, ..., 1, 1, 1], dtype=int64))

In [6]:
features.shape, labels.shape

((30000, 23), (30000,))

## Machine Learning Process

### Load Perming and Config Hyperparameters

In [7]:
import perming
main = perming.Box(23, 2, (50,), batch_size=8, activation='relu', inplace_on=True, solver='adam', learning_rate_init=0.01)
# main = perming.Binarier(23, (50,), batch_size=8, activation='relu', solver='adam', learning_rate_init=0.01)
# main = perming.COMMON_MODELS['Binary-classification'](23, (50,), batch_size=8, activation='relu', solver='adam', learning_rate_init=0.01)
main.print_config()

MLP(
  (mlp): Sequential(
    (Linear0): Linear(in_features=23, out_features=50, bias=True)
    (Activation0): ReLU(inplace=True)
    (Linear1): Linear(in_features=50, out_features=2, bias=True)
  )
)


OrderedDict([('torch -v', '1.7.1+cu101'),
             ('criterion', CrossEntropyLoss()),
             ('batch_size', 8),
             ('solver',
              Adam (
              Parameter Group 0
                  amsgrad: False
                  betas: (0.9, 0.99)
                  eps: 1e-08
                  lr: 0.01
                  weight_decay: 0
              )),
             ('lr_scheduler', None),
             ('device', device(type='cuda'))])

### DataLoader from Numpy with Multi-threaded

In [8]:
main.data_loader(features, labels, random_seed=0)

### Training Stage and Accelerated Validation

In [9]:
main.train_val(num_epochs=2, tolerance=1e-3, interval=100, early_stop=True)

Epoch [1/2], Step [100/3000], Training Loss: 63.6898, Validation Loss: 50.8645
Epoch [1/2], Step [200/3000], Training Loss: 239.8149, Validation Loss: 112.0449
Epoch [1/2], Step [300/3000], Training Loss: 4.0569, Validation Loss: 27.2366
Epoch [1/2], Step [400/3000], Training Loss: 0.4223, Validation Loss: 1.1859
Epoch [1/2], Step [500/3000], Training Loss: 0.4080, Validation Loss: 0.7996
Epoch [1/2], Step [600/3000], Training Loss: 0.4102, Validation Loss: 0.8256
Epoch [1/2], Step [700/3000], Training Loss: 0.5657, Validation Loss: 0.6627
Epoch [1/2], Step [800/3000], Training Loss: 0.5645, Validation Loss: 1.8432
Epoch [1/2], Step [900/3000], Training Loss: 0.2816, Validation Loss: 1.8356
Epoch [1/2], Step [1000/3000], Training Loss: 0.2500, Validation Loss: 1.5535
Epoch [1/2], Step [1100/3000], Training Loss: 0.5738, Validation Loss: 1.5565
Epoch [1/2], Step [1200/3000], Training Loss: 0.9008, Validation Loss: 1.5538
Epoch [1/2], Step [1300/3000], Training Loss: 0.7454, Validation L

### Test Models with Accuracy and Correct Labels

In [10]:
main.test()

loss of Box on the 3000 test dataset: 0.5797181725502014. accuracy: 76.1667 %


OrderedDict([('problem', 'classification'),
             ('num_classes', 2),
             ('column', ('label name', ('true numbers', 'total numbers'))),
             ('labels', {0: [2285, 3000], 1: [2285, 3000]}),
             ('loss',
              {'train': 0.5701802968978882,
               'val': 0.7915362119674683,
               'test': 0.5797181725502014}),
             ('sorted', [(0, [2285, 3000]), (1, [2285, 3000])])])

### Save Model Parameters to Models Folder

In [11]:
main.save(show=False, dir='../models/credit.ckpt')

### Load Model Parameters from Models Foler

In [12]:
main.load(show=False, dir='../models/credit.ckpt')

### Test with Loaded Parameters of Tune Model

In [13]:
main.test()
# main = perming.Box(23, 2, (50,), batch_size=8, activation='relu', inplace_on=True, solver='adam', learning_rate_init=0.01)
# main = perming.Binarier(23, 2, (50,), batch_size=8, activation='relu', solver='adam', learning_rate_init=0.01)
# main = perming.COMMON_MODELS['Binary-classification'](23, 2, (50,), batch_size=8, activation='relu', solver='adam', learning_rate_init=0.01)
# main.print_config()

loss of Box on the 3000 test dataset: 0.5797182321548462. accuracy: 76.1667 %


OrderedDict([('problem', 'classification'),
             ('num_classes', 2),
             ('column', ('label name', ('true numbers', 'total numbers'))),
             ('labels', {0: [2285, 3000], 1: [2285, 3000]}),
             ('loss',
              {'train': 0.5701802968978882,
               'val': 0.7915362119674683,
               'test': 0.5797182321548462}),
             ('sorted', [(0, [2285, 3000]), (1, [2285, 3000])])])