In [1]:
import pickle
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, make_scorer, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np
import seaborn as sns
import pandas as pd
from catboost import CatBoostClassifier

# Lectura de datos

In [2]:
data = pd.read_csv('/home/jose/Escritorio/datathon/src/data/train.txt', sep='|', index_col='ID')
# test = pd.read_csv('/home/jose/Escritorio/datathon/src/data/test.txt', sep='|', index_col='ID')

labels = data.iloc[:, -1]
data.drop('CLASE', axis=1, inplace=True)

train, test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=123)

data = pd.concat([train, test], sort=False)

data['CADASTRALQUALITYID'] = data['CADASTRALQUALITYID'].map({'9': '0',
                               '8': '1',
                               '7': '2',
                               '6': '3',
                               '5': '4',
                               '4': '5',
                               '3': '6',
                               '2': '7',
                               '1': '8',
                               'C': '9',
                               'B': '10',
                               'A': '11',})
data['CADASTRALQUALITYID'] = data['CADASTRALQUALITYID'].astype('category')

# Tratamiento de NaN

In [3]:
data['MAXBUILDINGFLOOR'].fillna(data['MAXBUILDINGFLOOR'].median(), inplace=True)
data['CADASTRALQUALITYID'].fillna(data['CADASTRALQUALITYID'].mode()[0], inplace=True)

# Preprocesado

In [4]:
sc = StandardScaler()
data = pd.concat([pd.DataFrame(sc.fit_transform(data.select_dtypes(['number'])),
                              index=data.index,
                              columns=data.select_dtypes(['number']).columns),
                data.select_dtypes(['category', 'object'])], axis=1, sort=False)

train, test = data.iloc[:train.shape[0], ], data.iloc[train.shape[0]:, ]

data_number = data.copy()
data_number['CADASTRALQUALITYID'] = sc.fit_transform(data['CADASTRALQUALITYID'].astype(np.int).values.reshape(-1,1))

train_number, test_number = data_number.iloc[:train.shape[0], ], data_number.iloc[train.shape[0]:, ]

# Modelo

In [32]:
model = KNeighborsClassifier(n_jobs=-1)

model.fit(train_number, y_train)

y_pred = model.predict(test_number)

In [33]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

 AGRICULTURE     0.7414    0.5375    0.6232        80
  INDUSTRIAL     0.6747    0.5996    0.6349       889
      OFFICE     0.3882    0.1545    0.2210       382
       OTHER     0.5655    0.3048    0.3961       269
      PUBLIC     0.4643    0.1522    0.2292       598
 RESIDENTIAL     0.9271    0.9883    0.9568     18018
      RETAIL     0.6495    0.1537    0.2485       410

    accuracy                         0.9047     20646
   macro avg     0.6301    0.4129    0.4728     20646
weighted avg     0.8819    0.9047    0.8855     20646



In [34]:
print(confusion_matrix(y_test, y_pred))

[[   43    16     1     0     2    18     0]
 [    4   533    26     9    12   293    12]
 [    0    89    59     3     9   219     3]
 [    0    27     5    82     9   141     5]
 [    1    31    10    20    91   441     4]
 [    8    72    36    24    60 17808    10]
 [    2    22    15     7    13   288    63]]


# Ajuste de hiperparametros

In [43]:
params = False

if params:
    params = {'n_neighbors': [1,3,5,7,11,13,15],
             'weights': ['uniform', 'distance'],
             'metric': ['minkowski', 'manhattan']}

    model = KNeighborsClassifier()

    grid = GridSearchCV(model, params, cv=5, scoring=make_scorer(f1_score, average='macro'), n_jobs=-1)

    grid.fit(train_number, y_train)
    
    best_params = grid.best_params_
    best_params['n_jobs'] = -1
    
    model = KNeighborsClassifier(**best_params)
    model.fit(train_number, y_train)
    
    print(best_params)
else:
    best_params = {'weights': 'distance', 'n_neighbors': 3, 'n_jobs': -1, 'metric': 'manhattan'}
    model = KNeighborsClassifier(**best_params)
    model.fit(train_number, y_train)

# Prueba del modelo ajustado

In [44]:
y_pred = model.predict(test_number)

In [45]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

 AGRICULTURE     0.8507    0.7125    0.7755        80
  INDUSTRIAL     0.7118    0.6029    0.6529       889
      OFFICE     0.3702    0.2277    0.2820       382
       OTHER     0.5904    0.4126    0.4858       269
      PUBLIC     0.4325    0.2090    0.2818       598
 RESIDENTIAL     0.9354    0.9810    0.9577     18018
      RETAIL     0.4608    0.2439    0.3190       410

    accuracy                         0.9054     20646
   macro avg     0.6217    0.4843    0.5364     20646
weighted avg     0.8865    0.9054    0.8929     20646



In [46]:
print(confusion_matrix(y_test, y_pred))

[[   57     6     0     0     0    16     1]
 [    4   536    39    13    14   256    27]
 [    1    76    87     2    18   185    13]
 [    0    11     7   111    12   122     6]
 [    1    30    15    25   125   390    12]
 [    4    76    72    28   104 17676    58]
 [    0    18    15     9    16   252   100]]


# Prueba XGB

In [6]:
model = XGBClassifier()

model.fit(train_number, y_train)

y_pred = model.predict(test_number)

XGBoostError: [20:23:08] /home/conda/feedstock_root/build_artifacts/xgboost_1572314959925/work/src/learner.cc:180: XGBoost version not compiled with GPU support.
Stack trace:
  [bt] (0) /home/jose/anaconda3/envs/py/lib/libxgboost.so(+0x6e5a4) [0x7fa38300c5a4]
  [bt] (1) /home/jose/anaconda3/envs/py/lib/libxgboost.so(+0x107fb0) [0x7fa3830a5fb0]
  [bt] (2) /home/jose/anaconda3/envs/py/lib/libxgboost.so(+0x10ebb5) [0x7fa3830acbb5]
  [bt] (3) /home/jose/anaconda3/envs/py/lib/libxgboost.so(+0x110f11) [0x7fa3830aef11]
  [bt] (4) /home/jose/anaconda3/envs/py/lib/libxgboost.so(XGBoosterUpdateOneIter+0x96) [0x7fa383018f16]
  [bt] (5) /home/jose/anaconda3/envs/py/lib/python3.7/lib-dynload/../../libffi.so.6(ffi_call_unix64+0x4c) [0x7fa3b3995ec0]
  [bt] (6) /home/jose/anaconda3/envs/py/lib/python3.7/lib-dynload/../../libffi.so.6(ffi_call+0x22d) [0x7fa3b399587d]
  [bt] (7) /home/jose/anaconda3/envs/py/lib/python3.7/lib-dynload/_ctypes.cpython-37m-x86_64-linux-gnu.so(_ctypes_callproc+0x2ce) [0x7fa3b3baaede]
  [bt] (8) /home/jose/anaconda3/envs/py/lib/python3.7/lib-dynload/_ctypes.cpython-37m-x86_64-linux-gnu.so(+0x12914) [0x7fa3b3bab914]



In [48]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

 AGRICULTURE     0.8649    0.4000    0.5470        80
  INDUSTRIAL     0.6762    0.4792    0.5609       889
      OFFICE     0.4667    0.0183    0.0353       382
       OTHER     0.5942    0.3048    0.4029       269
      PUBLIC     0.5260    0.1355    0.2154       598
 RESIDENTIAL     0.9126    0.9944    0.9518     18018
      RETAIL     0.6410    0.0610    0.1114       410

    accuracy                         0.8995     20646
   macro avg     0.6688    0.3419    0.4035     20646
weighted avg     0.8733    0.8995    0.8713     20646



In [50]:
print(confusion_matrix(y_test, y_pred))

[[   32    10     0     0     2    36     0]
 [    1   426     3     8     9   433     9]
 [    0    68     7     3    13   290     1]
 [    0    20     1    82    14   152     0]
 [    1    23     1    19    81   471     2]
 [    2    49     1    25    21 17918     2]
 [    1    34     2     1    14   333    25]]


# Prueba CatBoost

In [63]:
model = CatBoostClassifier(task_type='GPU')

model.fit(train, y_train, cat_features=[train.shape[1]-1])

y_pred = model.predict(test)

Learning rate set to 0.159709
0:	learn: 1.2840976	total: 12.1ms	remaining: 12.1s
1:	learn: 1.0410738	total: 22.5ms	remaining: 11.2s
2:	learn: 0.8908913	total: 33ms	remaining: 11s
3:	learn: 0.7874514	total: 44.3ms	remaining: 11s
4:	learn: 0.7097874	total: 54.6ms	remaining: 10.9s
5:	learn: 0.6506272	total: 66.6ms	remaining: 11s
6:	learn: 0.6024505	total: 76.5ms	remaining: 10.8s
7:	learn: 0.5667538	total: 86.7ms	remaining: 10.8s
8:	learn: 0.5384969	total: 97.6ms	remaining: 10.7s
9:	learn: 0.5145247	total: 109ms	remaining: 10.8s
10:	learn: 0.4944303	total: 120ms	remaining: 10.8s
11:	learn: 0.4780028	total: 130ms	remaining: 10.7s
12:	learn: 0.4646473	total: 140ms	remaining: 10.6s
13:	learn: 0.4537208	total: 150ms	remaining: 10.6s
14:	learn: 0.4449389	total: 160ms	remaining: 10.5s
15:	learn: 0.4380586	total: 170ms	remaining: 10.4s
16:	learn: 0.4301068	total: 178ms	remaining: 10.3s
17:	learn: 0.4244041	total: 187ms	remaining: 10.2s
18:	learn: 0.4190188	total: 195ms	remaining: 10.1s
19:	learn:

168:	learn: 0.3143082	total: 1.63s	remaining: 7.99s
169:	learn: 0.3136803	total: 1.64s	remaining: 7.99s
170:	learn: 0.3134844	total: 1.65s	remaining: 7.99s
171:	learn: 0.3132289	total: 1.66s	remaining: 7.98s
172:	learn: 0.3130514	total: 1.67s	remaining: 7.97s
173:	learn: 0.3128281	total: 1.68s	remaining: 7.96s
174:	learn: 0.3125997	total: 1.69s	remaining: 7.95s
175:	learn: 0.3124916	total: 1.7s	remaining: 7.94s
176:	learn: 0.3122898	total: 1.71s	remaining: 7.93s
177:	learn: 0.3119458	total: 1.72s	remaining: 7.92s
178:	learn: 0.3117879	total: 1.73s	remaining: 7.91s
179:	learn: 0.3115806	total: 1.73s	remaining: 7.9s
180:	learn: 0.3114750	total: 1.74s	remaining: 7.89s
181:	learn: 0.3113426	total: 1.75s	remaining: 7.87s
182:	learn: 0.3112349	total: 1.76s	remaining: 7.86s
183:	learn: 0.3110207	total: 1.77s	remaining: 7.85s
184:	learn: 0.3108633	total: 1.78s	remaining: 7.84s
185:	learn: 0.3105450	total: 1.79s	remaining: 7.84s
186:	learn: 0.3103910	total: 1.8s	remaining: 7.83s
187:	learn: 0.3

337:	learn: 0.2828299	total: 3.24s	remaining: 6.35s
338:	learn: 0.2826712	total: 3.25s	remaining: 6.35s
339:	learn: 0.2826177	total: 3.26s	remaining: 6.34s
340:	learn: 0.2825711	total: 3.27s	remaining: 6.33s
341:	learn: 0.2824718	total: 3.29s	remaining: 6.32s
342:	learn: 0.2823283	total: 3.29s	remaining: 6.31s
343:	learn: 0.2820969	total: 3.3s	remaining: 6.3s
344:	learn: 0.2819934	total: 3.31s	remaining: 6.29s
345:	learn: 0.2816851	total: 3.32s	remaining: 6.28s
346:	learn: 0.2814316	total: 3.33s	remaining: 6.27s
347:	learn: 0.2812839	total: 3.34s	remaining: 6.26s
348:	learn: 0.2811366	total: 3.35s	remaining: 6.25s
349:	learn: 0.2809727	total: 3.36s	remaining: 6.24s
350:	learn: 0.2808427	total: 3.37s	remaining: 6.23s
351:	learn: 0.2807562	total: 3.38s	remaining: 6.22s
352:	learn: 0.2806889	total: 3.39s	remaining: 6.21s
353:	learn: 0.2805849	total: 3.4s	remaining: 6.21s
354:	learn: 0.2804711	total: 3.41s	remaining: 6.2s
355:	learn: 0.2802909	total: 3.42s	remaining: 6.19s
356:	learn: 0.28

506:	learn: 0.2607118	total: 4.87s	remaining: 4.73s
507:	learn: 0.2606485	total: 4.88s	remaining: 4.72s
508:	learn: 0.2605155	total: 4.89s	remaining: 4.72s
509:	learn: 0.2603407	total: 4.9s	remaining: 4.71s
510:	learn: 0.2601962	total: 4.91s	remaining: 4.7s
511:	learn: 0.2600736	total: 4.92s	remaining: 4.69s
512:	learn: 0.2599129	total: 4.93s	remaining: 4.68s
513:	learn: 0.2597813	total: 4.94s	remaining: 4.67s
514:	learn: 0.2596189	total: 4.95s	remaining: 4.66s
515:	learn: 0.2595146	total: 4.96s	remaining: 4.65s
516:	learn: 0.2594100	total: 4.97s	remaining: 4.64s
517:	learn: 0.2592822	total: 4.98s	remaining: 4.64s
518:	learn: 0.2591429	total: 4.99s	remaining: 4.63s
519:	learn: 0.2589164	total: 5s	remaining: 4.62s
520:	learn: 0.2588512	total: 5.01s	remaining: 4.61s
521:	learn: 0.2587538	total: 5.02s	remaining: 4.6s
522:	learn: 0.2586210	total: 5.03s	remaining: 4.59s
523:	learn: 0.2584900	total: 5.04s	remaining: 4.58s
524:	learn: 0.2583874	total: 5.05s	remaining: 4.57s
525:	learn: 0.2583

672:	learn: 0.2421081	total: 6.49s	remaining: 3.15s
673:	learn: 0.2419936	total: 6.5s	remaining: 3.14s
674:	learn: 0.2418770	total: 6.51s	remaining: 3.13s
675:	learn: 0.2418281	total: 6.52s	remaining: 3.12s
676:	learn: 0.2417476	total: 6.53s	remaining: 3.11s
677:	learn: 0.2416483	total: 6.53s	remaining: 3.1s
678:	learn: 0.2415752	total: 6.54s	remaining: 3.09s
679:	learn: 0.2415138	total: 6.55s	remaining: 3.08s
680:	learn: 0.2413340	total: 6.56s	remaining: 3.07s
681:	learn: 0.2412622	total: 6.57s	remaining: 3.06s
682:	learn: 0.2411760	total: 6.58s	remaining: 3.05s
683:	learn: 0.2410865	total: 6.58s	remaining: 3.04s
684:	learn: 0.2409529	total: 6.59s	remaining: 3.03s
685:	learn: 0.2408403	total: 6.6s	remaining: 3.02s
686:	learn: 0.2407222	total: 6.61s	remaining: 3.01s
687:	learn: 0.2406220	total: 6.62s	remaining: 3s
688:	learn: 0.2405972	total: 6.63s	remaining: 2.99s
689:	learn: 0.2404416	total: 6.64s	remaining: 2.98s
690:	learn: 0.2402445	total: 6.65s	remaining: 2.97s
691:	learn: 0.2401

831:	learn: 0.2259723	total: 8.1s	remaining: 1.64s
832:	learn: 0.2259009	total: 8.11s	remaining: 1.63s
833:	learn: 0.2258151	total: 8.12s	remaining: 1.61s
834:	learn: 0.2257040	total: 8.13s	remaining: 1.61s
835:	learn: 0.2256105	total: 8.14s	remaining: 1.6s
836:	learn: 0.2255436	total: 8.15s	remaining: 1.59s
837:	learn: 0.2254546	total: 8.16s	remaining: 1.58s
838:	learn: 0.2253454	total: 8.17s	remaining: 1.57s
839:	learn: 0.2252913	total: 8.18s	remaining: 1.56s
840:	learn: 0.2252098	total: 8.19s	remaining: 1.55s
841:	learn: 0.2249961	total: 8.2s	remaining: 1.54s
842:	learn: 0.2249267	total: 8.21s	remaining: 1.53s
843:	learn: 0.2248272	total: 8.22s	remaining: 1.52s
844:	learn: 0.2247055	total: 8.23s	remaining: 1.51s
845:	learn: 0.2246521	total: 8.24s	remaining: 1.5s
846:	learn: 0.2245424	total: 8.25s	remaining: 1.49s
847:	learn: 0.2244984	total: 8.26s	remaining: 1.48s
848:	learn: 0.2243538	total: 8.27s	remaining: 1.47s
849:	learn: 0.2242273	total: 8.28s	remaining: 1.46s
850:	learn: 0.22

998:	learn: 0.2107711	total: 9.74s	remaining: 9.75ms
999:	learn: 0.2107113	total: 9.75s	remaining: 0us


In [64]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

 AGRICULTURE     0.8000    0.6000    0.6857        80
  INDUSTRIAL     0.7315    0.6007    0.6597       889
      OFFICE     0.4950    0.1309    0.2070       382
       OTHER     0.6354    0.4535    0.5293       269
      PUBLIC     0.5000    0.2291    0.3142       598
 RESIDENTIAL     0.9314    0.9920    0.9608     18018
      RETAIL     0.6364    0.1537    0.2475       410

    accuracy                         0.9119     20646
   macro avg     0.6757    0.4514    0.5149     20646
weighted avg     0.8920    0.9119    0.8943     20646



In [65]:
print(confusion_matrix(y_test, y_pred))

[[   48     8     0     0     3    21     0]
 [    4   534    16    10    24   285    16]
 [    1    71    50     3    26   228     3]
 [    0    16     4   122    15   110     2]
 [    0    23    15    27   137   391     5]
 [    5    46     8    25    50 17874    10]
 [    2    32     8     5    19   281    63]]


# Ajuste hiperparámetros Catboost

In [27]:
params = True

if params:
    aux = list(np.linspace(0,0.5,8))
    aux.append(None)
    params = {'cat_features': [train.shape[0]-1],
             'eval_metric': ['F1'],
             'iterations': [400,700,1000,1200,1500],
             'learning_rate': aux,
             'depth': [6, 8 ,16],
             'early_stopping_rounds': [5],
             'task_type':['GPU']}

    model = CatBoostClassifier(custom_metric=['Acuraccy', 'F1'], classes_count=7)

    grid = GridSearchCV(model, params, cv=5, scoring=make_scorer(f1_score, average='macro'), n_jobs=-1)

    grid.fit(train_number, y_train)
    
    best_params = grid.best_params_
    
    model = CatBoostClassifier(**best_params)
    model.fit(train, y_train)
    
    print(best_params)
else:
    best_params = {'weights': 'distance', 'n_neighbors': 3, 'n_jobs': -1, 'metric': 'manhattan'}
    model = KNeighborsClassifier(**best_params)
    model.fit(train_number, y_train)

KeyboardInterrupt: 

In [50]:
model = CatBoostClassifier(task_type='GPU', eval_metric='TotalF1', cat_features=[train.shape[1]-1], iterations=2000)

model.fit(train, y_train)

y_pred = model.predict(test)

Learning rate set to 0.089655
0:	learn: 0.8384246	total: 14.2ms	remaining: 28.4s
1:	learn: 0.8372769	total: 25.7ms	remaining: 25.7s
2:	learn: 0.8418657	total: 38.1ms	remaining: 25.4s
3:	learn: 0.8391090	total: 49.8ms	remaining: 24.8s
4:	learn: 0.8405486	total: 61.4ms	remaining: 24.5s
5:	learn: 0.8406798	total: 73.1ms	remaining: 24.3s
6:	learn: 0.8437122	total: 84.2ms	remaining: 24s
7:	learn: 0.8421528	total: 95.9ms	remaining: 23.9s
8:	learn: 0.8415577	total: 110ms	remaining: 24.4s
9:	learn: 0.8439748	total: 122ms	remaining: 24.3s
10:	learn: 0.8439572	total: 133ms	remaining: 24.1s
11:	learn: 0.8445895	total: 145ms	remaining: 24s
12:	learn: 0.8445265	total: 156ms	remaining: 23.9s
13:	learn: 0.8447486	total: 168ms	remaining: 23.9s
14:	learn: 0.8456793	total: 179ms	remaining: 23.6s
15:	learn: 0.8461423	total: 188ms	remaining: 23.3s
16:	learn: 0.8469211	total: 198ms	remaining: 23.1s
17:	learn: 0.8470643	total: 208ms	remaining: 22.8s
18:	learn: 0.8472971	total: 217ms	remaining: 22.6s
19:	lea

171:	learn: 0.8856659	total: 1.72s	remaining: 18.2s
172:	learn: 0.8858488	total: 1.73s	remaining: 18.2s
173:	learn: 0.8858943	total: 1.74s	remaining: 18.2s
174:	learn: 0.8859711	total: 1.75s	remaining: 18.2s
175:	learn: 0.8859754	total: 1.75s	remaining: 18.2s
176:	learn: 0.8859926	total: 1.77s	remaining: 18.2s
177:	learn: 0.8858693	total: 1.78s	remaining: 18.2s
178:	learn: 0.8860552	total: 1.79s	remaining: 18.2s
179:	learn: 0.8862421	total: 1.8s	remaining: 18.2s
180:	learn: 0.8862442	total: 1.81s	remaining: 18.2s
181:	learn: 0.8863525	total: 1.82s	remaining: 18.1s
182:	learn: 0.8863361	total: 1.83s	remaining: 18.1s
183:	learn: 0.8864041	total: 1.83s	remaining: 18.1s
184:	learn: 0.8864739	total: 1.84s	remaining: 18.1s
185:	learn: 0.8866185	total: 1.85s	remaining: 18.1s
186:	learn: 0.8867223	total: 1.86s	remaining: 18.1s
187:	learn: 0.8867617	total: 1.88s	remaining: 18.1s
188:	learn: 0.8867997	total: 1.89s	remaining: 18.1s
189:	learn: 0.8868979	total: 1.9s	remaining: 18.1s
190:	learn: 0.

339:	learn: 0.8961131	total: 3.35s	remaining: 16.4s
340:	learn: 0.8961044	total: 3.36s	remaining: 16.4s
341:	learn: 0.8960791	total: 3.37s	remaining: 16.4s
342:	learn: 0.8961279	total: 3.38s	remaining: 16.3s
343:	learn: 0.8961373	total: 3.39s	remaining: 16.3s
344:	learn: 0.8961511	total: 3.4s	remaining: 16.3s
345:	learn: 0.8962968	total: 3.41s	remaining: 16.3s
346:	learn: 0.8964150	total: 3.42s	remaining: 16.3s
347:	learn: 0.8963379	total: 3.43s	remaining: 16.3s
348:	learn: 0.8964329	total: 3.44s	remaining: 16.3s
349:	learn: 0.8964365	total: 3.45s	remaining: 16.3s
350:	learn: 0.8964785	total: 3.46s	remaining: 16.3s
351:	learn: 0.8965166	total: 3.47s	remaining: 16.3s
352:	learn: 0.8965333	total: 3.48s	remaining: 16.2s
353:	learn: 0.8965855	total: 3.49s	remaining: 16.2s
354:	learn: 0.8966606	total: 3.5s	remaining: 16.2s
355:	learn: 0.8966752	total: 3.51s	remaining: 16.2s
356:	learn: 0.8967481	total: 3.52s	remaining: 16.2s
357:	learn: 0.8967953	total: 3.53s	remaining: 16.2s
358:	learn: 0.

508:	learn: 0.9022695	total: 4.97s	remaining: 14.6s
509:	learn: 0.9023446	total: 4.99s	remaining: 14.6s
510:	learn: 0.9023922	total: 5s	remaining: 14.6s
511:	learn: 0.9023696	total: 5.01s	remaining: 14.6s
512:	learn: 0.9024684	total: 5.02s	remaining: 14.5s
513:	learn: 0.9024800	total: 5.03s	remaining: 14.5s
514:	learn: 0.9025641	total: 5.04s	remaining: 14.5s
515:	learn: 0.9025387	total: 5.04s	remaining: 14.5s
516:	learn: 0.9025443	total: 5.05s	remaining: 14.5s
517:	learn: 0.9026224	total: 5.06s	remaining: 14.5s
518:	learn: 0.9026436	total: 5.07s	remaining: 14.5s
519:	learn: 0.9027458	total: 5.08s	remaining: 14.5s
520:	learn: 0.9027146	total: 5.09s	remaining: 14.5s
521:	learn: 0.9027511	total: 5.1s	remaining: 14.4s
522:	learn: 0.9028156	total: 5.11s	remaining: 14.4s
523:	learn: 0.9029036	total: 5.12s	remaining: 14.4s
524:	learn: 0.9028810	total: 5.13s	remaining: 14.4s
525:	learn: 0.9029306	total: 5.14s	remaining: 14.4s
526:	learn: 0.9029144	total: 5.15s	remaining: 14.4s
527:	learn: 0.90

670:	learn: 0.9075224	total: 6.61s	remaining: 13.1s
671:	learn: 0.9076527	total: 6.62s	remaining: 13.1s
672:	learn: 0.9076998	total: 6.63s	remaining: 13.1s
673:	learn: 0.9076221	total: 6.64s	remaining: 13.1s
674:	learn: 0.9077323	total: 6.66s	remaining: 13.1s
675:	learn: 0.9077814	total: 6.67s	remaining: 13.1s
676:	learn: 0.9078251	total: 6.67s	remaining: 13s
677:	learn: 0.9077888	total: 6.68s	remaining: 13s
678:	learn: 0.9077410	total: 6.69s	remaining: 13s
679:	learn: 0.9077763	total: 6.7s	remaining: 13s
680:	learn: 0.9077653	total: 6.71s	remaining: 13s
681:	learn: 0.9079253	total: 6.72s	remaining: 13s
682:	learn: 0.9079947	total: 6.73s	remaining: 13s
683:	learn: 0.9079604	total: 6.74s	remaining: 13s
684:	learn: 0.9080062	total: 6.75s	remaining: 13s
685:	learn: 0.9080543	total: 6.76s	remaining: 13s
686:	learn: 0.9080578	total: 6.77s	remaining: 12.9s
687:	learn: 0.9081270	total: 6.78s	remaining: 12.9s
688:	learn: 0.9081628	total: 6.79s	remaining: 12.9s
689:	learn: 0.9081763	total: 6.8s

840:	learn: 0.9125332	total: 8.25s	remaining: 11.4s
841:	learn: 0.9125049	total: 8.26s	remaining: 11.4s
842:	learn: 0.9126649	total: 8.27s	remaining: 11.3s
843:	learn: 0.9126549	total: 8.28s	remaining: 11.3s
844:	learn: 0.9126807	total: 8.29s	remaining: 11.3s
845:	learn: 0.9127144	total: 8.3s	remaining: 11.3s
846:	learn: 0.9126798	total: 8.31s	remaining: 11.3s
847:	learn: 0.9127229	total: 8.32s	remaining: 11.3s
848:	learn: 0.9128480	total: 8.33s	remaining: 11.3s
849:	learn: 0.9128088	total: 8.34s	remaining: 11.3s
850:	learn: 0.9128706	total: 8.35s	remaining: 11.3s
851:	learn: 0.9129371	total: 8.36s	remaining: 11.3s
852:	learn: 0.9129046	total: 8.37s	remaining: 11.3s
853:	learn: 0.9129522	total: 8.38s	remaining: 11.2s
854:	learn: 0.9129867	total: 8.39s	remaining: 11.2s
855:	learn: 0.9130241	total: 8.4s	remaining: 11.2s
856:	learn: 0.9130391	total: 8.42s	remaining: 11.2s
857:	learn: 0.9130091	total: 8.44s	remaining: 11.2s
858:	learn: 0.9131046	total: 8.45s	remaining: 11.2s
859:	learn: 0.

1020:	learn: 0.9170935	total: 10.1s	remaining: 9.65s
1021:	learn: 0.9170822	total: 10.1s	remaining: 9.64s
1022:	learn: 0.9170272	total: 10.1s	remaining: 9.63s
1023:	learn: 0.9170648	total: 10.1s	remaining: 9.62s
1024:	learn: 0.9171939	total: 10.1s	remaining: 9.61s
1025:	learn: 0.9171787	total: 10.1s	remaining: 9.6s
1026:	learn: 0.9171758	total: 10.1s	remaining: 9.59s
1027:	learn: 0.9172358	total: 10.1s	remaining: 9.58s
1028:	learn: 0.9172245	total: 10.1s	remaining: 9.57s
1029:	learn: 0.9172525	total: 10.2s	remaining: 9.56s
1030:	learn: 0.9172845	total: 10.2s	remaining: 9.55s
1031:	learn: 0.9173363	total: 10.2s	remaining: 9.54s
1032:	learn: 0.9173397	total: 10.2s	remaining: 9.53s
1033:	learn: 0.9174163	total: 10.2s	remaining: 9.52s
1034:	learn: 0.9174196	total: 10.2s	remaining: 9.51s
1035:	learn: 0.9174780	total: 10.2s	remaining: 9.49s
1036:	learn: 0.9174281	total: 10.2s	remaining: 9.49s
1037:	learn: 0.9174692	total: 10.2s	remaining: 9.47s
1038:	learn: 0.9174667	total: 10.2s	remaining: 

1181:	learn: 0.9210616	total: 11.7s	remaining: 8.09s
1182:	learn: 0.9211083	total: 11.7s	remaining: 8.08s
1183:	learn: 0.9210984	total: 11.7s	remaining: 8.07s
1184:	learn: 0.9212381	total: 11.7s	remaining: 8.07s
1185:	learn: 0.9212699	total: 11.7s	remaining: 8.06s
1186:	learn: 0.9213173	total: 11.8s	remaining: 8.05s
1187:	learn: 0.9213636	total: 11.8s	remaining: 8.04s
1188:	learn: 0.9213786	total: 11.8s	remaining: 8.04s
1189:	learn: 0.9214084	total: 11.8s	remaining: 8.05s
1190:	learn: 0.9214088	total: 11.8s	remaining: 8.04s
1191:	learn: 0.9214392	total: 11.8s	remaining: 8.03s
1192:	learn: 0.9214193	total: 11.9s	remaining: 8.02s
1193:	learn: 0.9214441	total: 11.9s	remaining: 8.02s
1194:	learn: 0.9215091	total: 11.9s	remaining: 8.01s
1195:	learn: 0.9216635	total: 11.9s	remaining: 8s
1196:	learn: 0.9217027	total: 11.9s	remaining: 7.99s
1197:	learn: 0.9216815	total: 11.9s	remaining: 7.98s
1198:	learn: 0.9217246	total: 11.9s	remaining: 7.97s
1199:	learn: 0.9216934	total: 11.9s	remaining: 7.

1342:	learn: 0.9248826	total: 13.3s	remaining: 6.52s
1343:	learn: 0.9249505	total: 13.3s	remaining: 6.51s
1344:	learn: 0.9250476	total: 13.3s	remaining: 6.5s
1345:	learn: 0.9250322	total: 13.4s	remaining: 6.49s
1346:	learn: 0.9250304	total: 13.4s	remaining: 6.48s
1347:	learn: 0.9250442	total: 13.4s	remaining: 6.47s
1348:	learn: 0.9250479	total: 13.4s	remaining: 6.46s
1349:	learn: 0.9250640	total: 13.4s	remaining: 6.45s
1350:	learn: 0.9250837	total: 13.4s	remaining: 6.44s
1351:	learn: 0.9251064	total: 13.4s	remaining: 6.43s
1352:	learn: 0.9252126	total: 13.4s	remaining: 6.42s
1353:	learn: 0.9252185	total: 13.4s	remaining: 6.41s
1354:	learn: 0.9251665	total: 13.4s	remaining: 6.4s
1355:	learn: 0.9251900	total: 13.5s	remaining: 6.39s
1356:	learn: 0.9252193	total: 13.5s	remaining: 6.38s
1357:	learn: 0.9252112	total: 13.5s	remaining: 6.37s
1358:	learn: 0.9253438	total: 13.5s	remaining: 6.36s
1359:	learn: 0.9254262	total: 13.5s	remaining: 6.35s
1360:	learn: 0.9253986	total: 13.5s	remaining: 6

1509:	learn: 0.9284304	total: 15.1s	remaining: 4.91s
1510:	learn: 0.9284399	total: 15.2s	remaining: 4.9s
1511:	learn: 0.9284637	total: 15.2s	remaining: 4.89s
1512:	learn: 0.9284273	total: 15.2s	remaining: 4.88s
1513:	learn: 0.9284667	total: 15.2s	remaining: 4.87s
1514:	learn: 0.9285274	total: 15.2s	remaining: 4.86s
1515:	learn: 0.9285425	total: 15.2s	remaining: 4.85s
1516:	learn: 0.9285630	total: 15.2s	remaining: 4.84s
1517:	learn: 0.9285778	total: 15.2s	remaining: 4.83s
1518:	learn: 0.9286185	total: 15.2s	remaining: 4.82s
1519:	learn: 0.9286401	total: 15.2s	remaining: 4.81s
1520:	learn: 0.9287224	total: 15.3s	remaining: 4.8s
1521:	learn: 0.9288486	total: 15.3s	remaining: 4.79s
1522:	learn: 0.9288308	total: 15.3s	remaining: 4.78s
1523:	learn: 0.9288856	total: 15.3s	remaining: 4.77s
1524:	learn: 0.9288535	total: 15.3s	remaining: 4.76s
1525:	learn: 0.9288801	total: 15.3s	remaining: 4.75s
1526:	learn: 0.9289063	total: 15.3s	remaining: 4.74s
1527:	learn: 0.9289189	total: 15.3s	remaining: 4

1678:	learn: 0.9317989	total: 17s	remaining: 3.24s
1679:	learn: 0.9318635	total: 17s	remaining: 3.23s
1680:	learn: 0.9319008	total: 17s	remaining: 3.23s
1681:	learn: 0.9319539	total: 17s	remaining: 3.21s
1682:	learn: 0.9319988	total: 17s	remaining: 3.21s
1683:	learn: 0.9320467	total: 17s	remaining: 3.19s
1684:	learn: 0.9320921	total: 17s	remaining: 3.19s
1685:	learn: 0.9321107	total: 17.1s	remaining: 3.18s
1686:	learn: 0.9321130	total: 17.1s	remaining: 3.17s
1687:	learn: 0.9321390	total: 17.1s	remaining: 3.16s
1688:	learn: 0.9321558	total: 17.1s	remaining: 3.15s
1689:	learn: 0.9321972	total: 17.1s	remaining: 3.14s
1690:	learn: 0.9322062	total: 17.1s	remaining: 3.13s
1691:	learn: 0.9322193	total: 17.1s	remaining: 3.12s
1692:	learn: 0.9322459	total: 17.1s	remaining: 3.11s
1693:	learn: 0.9322518	total: 17.1s	remaining: 3.1s
1694:	learn: 0.9322902	total: 17.2s	remaining: 3.09s
1695:	learn: 0.9322902	total: 17.2s	remaining: 3.08s
1696:	learn: 0.9322573	total: 17.2s	remaining: 3.07s
1697:	le

1838:	learn: 0.9347987	total: 18.8s	remaining: 1.64s
1839:	learn: 0.9348468	total: 18.8s	remaining: 1.63s
1840:	learn: 0.9347995	total: 18.8s	remaining: 1.62s
1841:	learn: 0.9347832	total: 18.8s	remaining: 1.61s
1842:	learn: 0.9348678	total: 18.8s	remaining: 1.6s
1843:	learn: 0.9348261	total: 18.8s	remaining: 1.59s
1844:	learn: 0.9348830	total: 18.9s	remaining: 1.58s
1845:	learn: 0.9349083	total: 18.9s	remaining: 1.57s
1846:	learn: 0.9349644	total: 18.9s	remaining: 1.56s
1847:	learn: 0.9349484	total: 18.9s	remaining: 1.55s
1848:	learn: 0.9350055	total: 18.9s	remaining: 1.54s
1849:	learn: 0.9350032	total: 18.9s	remaining: 1.53s
1850:	learn: 0.9350019	total: 18.9s	remaining: 1.52s
1851:	learn: 0.9350940	total: 18.9s	remaining: 1.51s
1852:	learn: 0.9351628	total: 18.9s	remaining: 1.5s
1853:	learn: 0.9351752	total: 18.9s	remaining: 1.49s
1854:	learn: 0.9351760	total: 19s	remaining: 1.48s
1855:	learn: 0.9351697	total: 19s	remaining: 1.47s
1856:	learn: 0.9352182	total: 19s	remaining: 1.46s
1

In [51]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

 AGRICULTURE     0.8393    0.5875    0.6912        80
  INDUSTRIAL     0.7303    0.6153    0.6679       889
      OFFICE     0.5189    0.1440    0.2254       382
       OTHER     0.6402    0.4498    0.5284       269
      PUBLIC     0.4946    0.2291    0.3131       598
 RESIDENTIAL     0.9319    0.9910    0.9605     18018
      RETAIL     0.6364    0.1707    0.2692       410

    accuracy                         0.9121     20646
   macro avg     0.6845    0.4553    0.5223     20646
weighted avg     0.8929    0.9121    0.8952     20646



In [52]:
print(confusion_matrix(y_test, y_pred))

[[   47    10     0     0     3    20     0]
 [    2   547    14     9    23   277    17]
 [    1    75    55     3    21   224     3]
 [    0    16     3   121    14   115     0]
 [    0    25    13    23   137   390    10]
 [    6    48    10    28    61 17855    10]
 [    0    28    11     5    18   278    70]]
