In [1]:
from model import Model
from dataset_loader import DatasetLoader
from dcdm import DCDM

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import gc
from scipy import stats
import pickle

GurobiError: HostID mismatch (licensed to f3dbe6b0, hostid is 5d21c9ae)


## 1. Loading datasets

Checklist:
- Each matrix should be of type `double`
- Every `nan` should be removed
- `y` should be flattened

In [2]:
dl = DatasetLoader()

In [3]:
dl.datasets

['a4a',
 'a5a',
 'a6a',
 'a7a',
 'a8a',
 'a9a',
 'australian_scale',
 'mushrooms',
 'phishing',
 'real-sim',
 'skin_nonskin']

## 2. Experiments

In [4]:
all_datasets = dl.datasets
optimizers = [
    DCDM,
    'cvxpy_MOSEK', 
    'cvxpy_SCS',
]

In [5]:
results = {}

for opt in optimizers:
    results[opt] = {}

for i, dataset_name in enumerate(dl.datasets):
    dataset_data = dl.get_dataset(dataset_name)
    for opt in optimizers:
        results[opt][dataset_name] = {}
        print(f'Fitting {dataset_name} on optimizer {opt}...')
        m = Model()
        try:
            result, df, time = m.fit(X=MinMaxScaler().fit_transform(dataset_data['X_train']), y=dataset_data['y_train'], optimizer=opt)
        except Exception as e:
            print("\t", optimizer, e.__doc__, str(e)[:20])
            continue
        results[opt][dataset_name]['result'] = result
        results[opt][dataset_name]['df'] = df
        results[opt][dataset_name]['time'] = time
        with open(f'results_{i}.pickle', 'wb') as handle:
            pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)
    del dataset_data
    gc.collect()

with open('results.pickle', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

Fitting australian_scale on optimizer cvxpy_SCS...
Fitting mushrooms on optimizer cvxpy_SCS...
Fitting phishing on optimizer cvxpy_SCS...


In [6]:
with open('results.pickle', 'rb') as handle:
    results = pickle.load(handle)
results

{'cvxpy_SCS': {'australian_scale': {'result': True,
   'df':      Iter       pri res       dua res       rel gap       pri obj  \
   0       0  5.290000e+18  9.200000e+18  8.100000e-01 -1.930000e+22   
   1     100  2.190000e-01  1.640000e-01  2.130000e-03 -4.260000e+03   
   2     200  4.830000e-02  2.710000e-02  6.410000e-04 -4.230000e+03   
   3     300  1.750000e-02  1.800000e-02  8.110000e-04 -4.250000e+03   
   4     400  5.480000e-03  4.040000e-02  2.540000e-04 -4.240000e+03   
   5     500  1.090000e-02  1.090000e-02  6.530000e-04 -4.250000e+03   
   6     600  1.510000e-01  3.150000e-01  6.040000e-04 -4.250000e+03   
   7     700  3.140000e-02  5.680000e-02  5.820000e-05 -4.250000e+03   
   8     800  4.720000e-02  1.680000e-01  2.280000e-03 -4.220000e+03   
   9     900  1.170000e-02  1.280000e-02  3.890000e-04 -4.240000e+03   
   10   1000  1.410000e-03  6.300000e-03  1.690000e-04 -4.250000e+03   
   11   1100  1.710000e-02  2.480000e-02  3.900000e-05 -4.250000e+03   
   12 