# Multi-classification Task

In [None]:
!pip install polars[pandas]

In [1]:
import numpy
import pandas # or use `polars`

## Data Cleaning and Processing

### Load Dataset from CSV File by IO Process

In [2]:
df = pandas.read_csv('../data/bitcoin_heist_data.csv')
df.head()

Unnamed: 0,address,year,day,length,weight,count,looped,neighbors,income,label
0,111K8kZAEnJg245r2cM6y9zgJGHZtJPy6,2017,11,18,0.008333,1,0,2,100050000.0,princetonCerber
1,1123pJv8jzeFQaCV4w644pzQJzVWay2zcA,2016,132,44,0.000244,1,0,1,100000000.0,princetonLocky
2,112536im7hy6wtKbpH1qYDWtTyMRAcA2p7,2016,246,0,1.0,1,0,2,200000000.0,princetonCerber
3,1126eDRw2wqSkWosjTCre8cjjQW8sSeWH7,2016,322,72,0.003906,1,0,2,71200000.0,princetonCerber
4,1129TSjKtx65E35GiUo4AYVeyo48twbrGX,2016,238,144,0.072848,456,0,1,200000000.0,princetonLocky


### Convert Any Format of Data to Numpy 

In [3]:
df = df.to_numpy()
labels = df[:,-1]

### Processing Data and Obtain Dataset Information

In [4]:
features = df[:,1:-1].astype(numpy.float64)
print(features.shape)

(1048575, 8)


In [5]:
print(features.shape[0], features.shape[1])
print(labels.shape, len(numpy.unique(labels)))

1048575 8
(1048575,) 29


## Machine Learning Process

### Load Perming and Config Hyperparameters

In [6]:
import perming
main = perming.Box(8, 29, (60,), batch_size=256, activation='relu', inplace_on=True, solver='sgd', learning_rate_init=0.01)
# main = perming.Multipler(8, 29, (60,), batch_size=256, activation='relu', solver='sgd', learning_rate_init=0.01)
# main = perming.COMMON_MODELS['Multi-classification'](8, 29, (60,), batch_size=256, activation='relu', solver='sgd', learning_rate_init=0.01)
main.print_config()

MLP(
  (mlp): Sequential(
    (Linear0): Linear(in_features=8, out_features=60, bias=True)
    (Activation0): ReLU(inplace=True)
    (Linear1): Linear(in_features=60, out_features=29, bias=True)
  )
)


OrderedDict([('torch -v', '1.7.1+cu101'),
             ('criterion', CrossEntropyLoss()),
             ('batch_size', 256),
             ('solver',
              SGD (
              Parameter Group 0
                  dampening: 0
                  lr: 0.01
                  momentum: 0
                  nesterov: False
                  weight_decay: 0
              )),
             ('lr_scheduler', None),
             ('device', device(type='cuda'))])

### Dataloader from Numpy with Multi-threaded

In [7]:
main.data_loader(features, labels, random_seed=0)

### Training Stage and Accelerated Validation

In [8]:
main.train_val(num_epochs=1, interval=100, early_stop=True)

Epoch [1/1], Step [100/3277], Training Loss: 2.5657, Validation Loss: 2.5551
Epoch [1/1], Step [200/3277], Training Loss: 1.8318, Validation Loss: 1.8269
Epoch [1/1], Step [300/3277], Training Loss: 1.2668, Validation Loss: 1.2844
Epoch [1/1], Step [400/3277], Training Loss: 0.9546, Validation Loss: 0.9302
Epoch [1/1], Step [500/3277], Training Loss: 0.7440, Validation Loss: 0.7169
Epoch [1/1], Step [600/3277], Training Loss: 0.5863, Validation Loss: 0.5889
Epoch [1/1], Step [700/3277], Training Loss: 0.5062, Validation Loss: 0.5086
Epoch [1/1], Step [800/3277], Training Loss: 0.3308, Validation Loss: 0.4563
Epoch [1/1], Step [900/3277], Training Loss: 0.3079, Validation Loss: 0.4204
Epoch [1/1], Step [1000/3277], Training Loss: 0.4298, Validation Loss: 0.3946
Epoch [1/1], Step [1100/3277], Training Loss: 0.3918, Validation Loss: 0.3758
Epoch [1/1], Step [1200/3277], Training Loss: 0.4366, Validation Loss: 0.3618
Process stop at epoch [1/1] with patience 10 within tolerance 0.001


### Test Model with Accuracy and Correct Labels

In [9]:
main.test()

loss of Box on the 104960 test dataset: 0.3505959212779999.


OrderedDict([('problem', 'classification'),
             ('accuracy', '95.99942835365853%'),
             ('num_classes', 29),
             ('column', ('label name', ('true numbers', 'total numbers'))),
             ('labels',
              {'montrealAPT': [100761, 104857],
               'montrealComradeCircle': [100761, 104857],
               'montrealCryptConsole': [100761, 104857],
               'montrealCryptXXX': [100761, 104857],
               'montrealCryptoLocker': [100761, 104857],
               'montrealCryptoTorLocker2015': [100761, 104857],
               'montrealDMALocker': [100761, 104857],
               'montrealDMALockerv3': [100761, 104857],
               'montrealEDA2': [100761, 104857],
               'montrealFlyper': [100761, 104857],
               'montrealGlobe': [100761, 104857],
               'montrealGlobeImposter': [100761, 104857],
               'montrealGlobev3': [100761, 104857],
               'montrealJigSaw': [100761, 104857],
               

### Save Model Parameters to Models Folder

In [10]:
main.save(con=False, dir='../models/bitcoin.ckpt')
# use main.unique and main.indices to establish bidirectional conversion of target:
# main.unique: int -> any target value
# main.indices: any target value -> int

### Load Model Parameters from Models Folder

In [11]:
main.load(con=False, dir='../models/bitcoin.ckpt')

### Test with Loaded Parameters or Tune Model

In [12]:
main.test()
# main = perming.Box(8, 29, (40,), batch_size=256, activation='relu', inplace_on=True, solver='sgd', learning_rate_init=0.01)
# main = perming.Multiple(8, 9, (40,), batch_size=256, activation='relu', solver='sgd', learning_rate_init=0.01)
# main = perming.COMMON_MODELS['Mutiple Classifier'](8, 9, (40,), batch_size=256, activation='relu', solver='sgd', learning_rate_init=0.01)
# main.print_config()

loss of Box on the 104960 test dataset: 0.35059523582458496.


OrderedDict([('problem', 'classification'),
             ('accuracy', '95.99942835365853%'),
             ('num_classes', 29),
             ('column', ('label name', ('true numbers', 'total numbers'))),
             ('labels',
              {'montrealAPT': [100761, 104857],
               'montrealComradeCircle': [100761, 104857],
               'montrealCryptConsole': [100761, 104857],
               'montrealCryptXXX': [100761, 104857],
               'montrealCryptoLocker': [100761, 104857],
               'montrealCryptoTorLocker2015': [100761, 104857],
               'montrealDMALocker': [100761, 104857],
               'montrealDMALockerv3': [100761, 104857],
               'montrealEDA2': [100761, 104857],
               'montrealFlyper': [100761, 104857],
               'montrealGlobe': [100761, 104857],
               'montrealGlobeImposter': [100761, 104857],
               'montrealGlobev3': [100761, 104857],
               'montrealJigSaw': [100761, 104857],
               