# Multi-classification Task

In [None]:
!pip install polars[pandas]

In [1]:
import numpy
import pandas # or use `polars`

## Data Cleaning and Processing

### Load Dataset from CSV File by IO Process

In [2]:
df = pandas.read_csv('../data/bitcoin_heist_data.csv')
df.head()

Unnamed: 0,address,year,day,length,weight,count,looped,neighbors,income,label
0,111K8kZAEnJg245r2cM6y9zgJGHZtJPy6,2017,11,18,0.008333,1,0,2,100050000.0,princetonCerber
1,1123pJv8jzeFQaCV4w644pzQJzVWay2zcA,2016,132,44,0.000244,1,0,1,100000000.0,princetonLocky
2,112536im7hy6wtKbpH1qYDWtTyMRAcA2p7,2016,246,0,1.0,1,0,2,200000000.0,princetonCerber
3,1126eDRw2wqSkWosjTCre8cjjQW8sSeWH7,2016,322,72,0.003906,1,0,2,71200000.0,princetonCerber
4,1129TSjKtx65E35GiUo4AYVeyo48twbrGX,2016,238,144,0.072848,456,0,1,200000000.0,princetonLocky


### Convert Any Format of Data to Numpy 

In [3]:
df = df.to_numpy()
labels = df[:,-1]

### Processing Data and Obtain Dataset Information

In [4]:
features = df[:,1:-1].astype(numpy.float64)
print(features.shape)

(1048575, 8)


In [5]:
print(features.shape[0], features.shape[1])
print(labels.shape, len(numpy.unique(labels)))

1048575 8
(1048575,) 29


## Machine Learning Process

### Load Perming and Config Hyperparameters

In [6]:
import perming
main = perming.Box(8, 29, (60,), batch_size=256, activation='relu', inplace_on=True, solver='sgd', learning_rate_init=0.01)
# main = perming.Mutipler(8, 29, (60,), batch_size=256, activation='relu', solver='sgd', learning_rate_init=0.01)
# main = perming.COMMON_MODELS['Multi-classification'](8, 29, (60,), batch_size=256, activation='relu', solver='sgd', learning_rate_init=0.01)
main.print_config()

MLP(
  (mlp): Sequential(
    (Linear0): Linear(in_features=8, out_features=60, bias=True)
    (Activation0): ReLU(inplace=True)
    (Linear1): Linear(in_features=60, out_features=29, bias=True)
  )
)


OrderedDict([('torch -v', '1.7.1+cu101'),
             ('criterion', CrossEntropyLoss()),
             ('batch_size', 256),
             ('solver',
              SGD (
              Parameter Group 0
                  dampening: 0
                  lr: 0.01
                  momentum: 0
                  nesterov: False
                  weight_decay: 0
              )),
             ('lr_scheduler', None),
             ('device', device(type='cuda'))])

### Dataloader from Numpy with Multi-threaded

In [7]:
main.data_loader(features, labels, random_seed=0)

### Training Stage and Accelerated Validation

In [8]:
main.train_val(num_epochs=1, interval=100, early_stop=True) # set n_jobs > 1 within number of processes

Epoch [1/1], Step [100/3277], Training Loss: 2.6289, Validation Loss: 2.5405
Epoch [1/1], Step [200/3277], Training Loss: 1.8560, Validation Loss: 1.8456
Epoch [1/1], Step [300/3277], Training Loss: 1.3494, Validation Loss: 1.2955
Epoch [1/1], Step [400/3277], Training Loss: 0.9759, Validation Loss: 0.9386
Epoch [1/1], Step [500/3277], Training Loss: 0.6706, Validation Loss: 0.7154
Epoch [1/1], Step [600/3277], Training Loss: 0.5756, Validation Loss: 0.5840
Epoch [1/1], Step [700/3277], Training Loss: 0.5099, Validation Loss: 0.4954
Process stop at epoch [1/1] with patience 10 within tolerance 0.001


if use `main.train_val(num_epochs=1, interval=100, n_jobs=1)`
```text
Epoch [1/1], Step [100/3277], Training Loss: 2.5056, Validation Loss: 2.4922
Epoch [1/1], Step [200/3277], Training Loss: 1.7820, Validation Loss: 1.7773
Epoch [1/1], Step [300/3277], Training Loss: 1.2424, Validation Loss: 1.2495
Epoch [1/1], Step [400/3277], Training Loss: 0.8592, Validation Loss: 0.9077
Epoch [1/1], Step [500/3277], Training Loss: 0.7162, Validation Loss: 0.7022
Epoch [1/1], Step [600/3277], Training Loss: 0.5604, Validation Loss: 0.5793
Epoch [1/1], Step [700/3277], Training Loss: 0.5167, Validation Loss: 0.5015
Epoch [1/1], Step [800/3277], Training Loss: 0.4127, Validation Loss: 0.4510
Epoch [1/1], Step [900/3277], Training Loss: 0.4142, Validation Loss: 0.4162
Epoch [1/1], Step [1000/3277], Training Loss: 0.2938, Validation Loss: 0.3913
Epoch [1/1], Step [1100/3277], Training Loss: 0.3517, Validation Loss: 0.3730
Epoch [1/1], Step [1200/3277], Training Loss: 0.4728, Validation Loss: 0.3592
Epoch [1/1], Step [1300/3277], Training Loss: 0.3026, Validation Loss: 0.3485
Epoch [1/1], Step [1400/3277], Training Loss: 0.2954, Validation Loss: 0.3400
Epoch [1/1], Step [1500/3277], Training Loss: 0.2052, Validation Loss: 0.3332
Epoch [1/1], Step [1600/3277], Training Loss: 0.1787, Validation Loss: 0.3276
Epoch [1/1], Step [1700/3277], Training Loss: 0.3615, Validation Loss: 0.3229
Epoch [1/1], Step [1800/3277], Training Loss: 0.3583, Validation Loss: 0.3192
Epoch [1/1], Step [1900/3277], Training Loss: 0.3548, Validation Loss: 0.3159
Epoch [1/1], Step [2000/3277], Training Loss: 0.2447, Validation Loss: 0.3130
Epoch [1/1], Step [2100/3277], Training Loss: 0.2626, Validation Loss: 0.3105
Epoch [1/1], Step [2200/3277], Training Loss: 0.3019, Validation Loss: 0.3084
Epoch [1/1], Step [2300/3277], Training Loss: 0.3012, Validation Loss: 0.3066
Epoch [1/1], Step [2400/3277], Training Loss: 0.3246, Validation Loss: 0.3049
Epoch [1/1], Step [2500/3277], Training Loss: 0.2772, Validation Loss: 0.3034
Epoch [1/1], Step [2600/3277], Training Loss: 0.2976, Validation Loss: 0.3022
Epoch [1/1], Step [2700/3277], Training Loss: 0.2074, Validation Loss: 0.3011
Epoch [1/1], Step [2800/3277], Training Loss: 0.2504, Validation Loss: 0.3001
Epoch [1/1], Step [2900/3277], Training Loss: 0.3414, Validation Loss: 0.2992
Epoch [1/1], Step [3000/3277], Training Loss: 0.2716, Validation Loss: 0.2983
Epoch [1/1], Step [3100/3277], Training Loss: 0.5427, Validation Loss: 0.2975
Epoch [1/1], Step [3200/3277], Training Loss: 0.3626, Validation Loss: 0.2968
```


### Test Model with Accuracy and Correct Labels

In [9]:
main.test()

loss of Box on the 104960 test dataset: 0.46497929096221924.


OrderedDict([('problem', 'classification'),
             ('accuracy', '95.99942835365853%'),
             ('num_classes', 29),
             ('column', ('label name', ('true numbers', 'total numbers'))),
             ('labels',
              {'montrealAPT': [100761, 104857],
               'montrealComradeCircle': [100761, 104857],
               'montrealCryptConsole': [100761, 104857],
               'montrealCryptXXX': [100761, 104857],
               'montrealCryptoLocker': [100761, 104857],
               'montrealCryptoTorLocker2015': [100761, 104857],
               'montrealDMALocker': [100761, 104857],
               'montrealDMALockerv3': [100761, 104857],
               'montrealEDA2': [100761, 104857],
               'montrealFlyper': [100761, 104857],
               'montrealGlobe': [100761, 104857],
               'montrealGlobeImposter': [100761, 104857],
               'montrealGlobev3': [100761, 104857],
               'montrealJigSaw': [100761, 104857],
               

### Save Model Parameters to Models Folder

In [11]:
main.save(con=False, dir='../models/bitcoin.ckpt')
# use main.unique and main.indices to establish bidirectional conversion of target:
# main.unique: int -> any target value
# main.indices: any target value -> int

### Load Model Parameters from Models Folder

In [8]:
main.load(con=False, dir='../models/bitcoin.ckpt')

### Test with Loaded Parameters or Tune Model

In [9]:
# main = perming.Box(8, 29, (40,), batch_size=256, activation='relu', inplace_on=True, solver='sgd', learning_rate_init=0.01)
# main = perming.Mutipler(8, 9, (40,), batch_size=256, activation='relu', solver='sgd', learning_rate_init=0.01)
# main = perming.COMMON_MODELS['Multi-classification'](8, 9, (40,), batch_size=256, activation='relu', solver='sgd', learning_rate_init=0.01)
# main.print_config()
main.set_freeze({0:False}) # freeze the first layer of `self.model`
main.train_val(num_epochs=1, interval=100, early_stop=True)
main.test()

Epoch [1/1], Step [100/3277], Training Loss: 0.4768, Validation Loss: 0.4207
Epoch [1/1], Step [200/3277], Training Loss: 0.3780, Validation Loss: 0.3931
Process stop at epoch [1/1] with patience 10 within tolerance 0.001
loss of Box on the 104960 test dataset: 0.38367217779159546.


OrderedDict([('problem', 'classification'),
             ('accuracy', '95.99942835365853%'),
             ('num_classes', 29),
             ('column', ('label name', ('true numbers', 'total numbers'))),
             ('labels',
              {'montrealAPT': [100761, 104857],
               'montrealComradeCircle': [100761, 104857],
               'montrealCryptConsole': [100761, 104857],
               'montrealCryptXXX': [100761, 104857],
               'montrealCryptoLocker': [100761, 104857],
               'montrealCryptoTorLocker2015': [100761, 104857],
               'montrealDMALocker': [100761, 104857],
               'montrealDMALockerv3': [100761, 104857],
               'montrealEDA2': [100761, 104857],
               'montrealFlyper': [100761, 104857],
               'montrealGlobe': [100761, 104857],
               'montrealGlobeImposter': [100761, 104857],
               'montrealGlobev3': [100761, 104857],
               'montrealJigSaw': [100761, 104857],
               