In [1]:
from fastai.text import * 
from fastai.callbacks import *
from sklearn.model_selection import train_test_split
from pathlib import Path
import pickle as pkl

torch.cuda.set_device(1)

In [2]:
import unicodedata
import pandas as pd
import re
import spacy
import json

In [3]:
PATH = Path('.')

In [4]:
def normalize_title(title):
    s = re.sub(r'[^a-zA-Z0-9ñç% ]', ' ', unicodedata.normalize('NFKD', title.lower()).encode('ascii', 'ignore').decode("utf-8"))
#    s = re.sub(r'[\d]+', "1", s)
    s = re.sub(r's |s$', ' ', s)
    s = re.sub(r' +', ' ', s)
    s = re.sub(r'(1 )+', '1 ', s)
    return re.sub(r'o |o$', 'a ', s).strip()

In [5]:
with open("main_categories.json", "r") as f:
    cats = json.load(f)

In [6]:
vocab = Vocab.load('models/v4_vocab_por' + '.pkl')

In [7]:
df = pd.read_csv(PATH / 'train.csv')
df = df[df.language == 'portuguese']
df.title = df.title.apply(normalize_title)
df = df[~df.title.isna() & (df.title != 'nan') & (df.title != '')]

  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
df['main_category'] = df.category.map(cats)

In [12]:
processor = SPProcessor.load('',tmp_dir='lm_por_v4')

for cat in set(cats.values()):
    d = df[df.main_category == cat]
    print(cat, len(d))
    if len(d):
        train, valid = train_test_split(d, test_size=0.1, stratify=d.category, random_state = 42)
        data_class = ItemLists('', TextList.from_df(train, vocab=vocab, cols=['title'], processor=processor),
                                TextList.from_df(valid, vocab=vocab, cols=['title'], processor=processor))
        data_class = data_class.label_from_df(cols=['category'])
        data_class = data_class.databunch(bs=800, num_workers=1)      
        data_class.save(f'/data/anime/ml/por/{cat}.pkl')

In [13]:
from sklearn.metrics import f1_score

@np_func
def f1(inp,targ): 
    y_pred = np.argmax(inp, axis=-1)
    return f1_score(targ, y_pred, average='weighted',  labels=np.unique(y_pred))

class WeightedLabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, weight, eps:float=0.1, reduction='mean'):
        super().__init__()
        self.weight,self.eps,self.reduction = weight,eps,reduction
        
    def forward(self, output, target):
        c = output.size()[-1]
        log_preds = F.log_softmax(output, dim=-1)
        if self.reduction=='sum': loss = -log_preds.sum()
        else:
            loss = -log_preds.sum(dim=-1)
            if self.reduction=='mean':  loss = loss.mean()
        return loss*self.eps/c + (1-self.eps) * F.nll_loss(log_preds, target, weight=self.weight, reduction=self.reduction)

In [18]:
for cat in set(cats.values()):
    print(cat)
    if not (Path(f'/data/anime/ml/por/{cat}_model.pth')).exists():
        data_class = load_data('/data/anime/ml/por/', cat + '.pkl', bs=500)
        num_trn = len(data_class.train_ds.x)
        num_val = len(data_class.valid_ds.x)
        num_trn, num_val, num_trn+num_val
        trn_LabelCounts = np.unique(data_class.train_ds.y.items, return_counts=True)[1]
        val_LabelCounts = np.unique(data_class.valid_ds.y.items, return_counts=True)[1]
        trn_LabelCounts, val_LabelCounts
        trn_weights = [1 - count/num_trn for count in trn_LabelCounts]
        val_weights = [1 - count/num_val for count in val_LabelCounts]
        trn_weights, val_weights
        
        config = awd_lstm_clas_config.copy()
        config['qrnn'] = True
        config['n_hid'] = 1550 #default 1152
        config['n_layers'] = 4 #default 3      
        
        learn_c = text_classifier_learner(data_class, AWD_LSTM, pretrained=False, config=config, drop_mult=0.3)
        learn_c.load_encoder('best_lm_por_v4_encoder');
        
        loss_weights = torch.FloatTensor(trn_weights).cuda()
        learn_c.loss_func = FlattenedLoss(WeightedLabelSmoothingCrossEntropy, weight=loss_weights)
        
        learn_c.freeze()
        learn_c.fit_one_cycle(2, 3e-2, moms=(0.8,0.7))
        
        learn_c.freeze_to(-2)
        learn_c.fit_one_cycle(2, 3e-2, wd=0.01, moms=(0.8,0.7))
        
        lr = 3e-2
        wd = 0.01
        learn_c.freeze_to(-3)
        learn_c.fit_one_cycle(2, slice(lr/(2.6**4),lr), wd=wd, moms=(0.8,0.7))
        
        learn_c.unfreeze()
        learn_c.fit_one_cycle(2, slice(lr/10/(2.6**4),lr/10), wd=wd, moms=(0.8,0.7))
        
        learn_c.save(f'/data/anime/ml/por/{cat}_model')
        del learn_c
        torch.cuda.empty_cache()
        gc.collect()

Musica, Filmes e Seriados


epoch,train_loss,valid_loss,accuracy,time
0,0.597866,0.571005,0.878863,00:05
1,0.553747,0.490856,0.900798,00:05


epoch,train_loss,valid_loss,accuracy,time
0,0.514318,0.472029,0.918744,00:05
1,0.484327,0.455096,0.924227,00:05


epoch,train_loss,valid_loss,accuracy,time
0,0.452514,0.493933,0.92323,00:06
1,0.437799,0.445883,0.929711,00:06


epoch,train_loss,valid_loss,accuracy,time
0,0.40889,0.443828,0.931206,00:08
1,0.403202,0.446752,0.930209,00:08


Eletronicos, Audio e Video


epoch,train_loss,valid_loss,accuracy,time
0,1.230368,1.144332,0.890175,01:01
1,1.159058,1.098166,0.89707,00:59


epoch,train_loss,valid_loss,accuracy,time
0,1.111668,1.048474,0.910227,01:04
1,1.043786,1.00467,0.918592,01:07


epoch,train_loss,valid_loss,accuracy,time
0,1.04013,1.007058,0.919112,01:22
1,1.004763,0.980705,0.924266,01:18


epoch,train_loss,valid_loss,accuracy,time
0,0.964941,0.981061,0.924922,02:08
1,0.968259,0.97858,0.925532,02:19


Ingressos


epoch,train_loss,valid_loss,accuracy,time
0,0.504639,0.482706,0.920561,00:04
1,0.429863,0.363009,0.94081,00:04


epoch,train_loss,valid_loss,accuracy,time
0,0.40835,0.32024,0.917445,00:04
1,0.361366,0.312602,0.942368,00:04


epoch,train_loss,valid_loss,accuracy,time
0,0.291529,0.448263,0.964174,00:04
1,0.281293,0.41653,0.974299,00:04


epoch,train_loss,valid_loss,accuracy,time
0,0.253205,0.448212,0.970405,00:05
1,0.247136,0.385117,0.972741,00:05


Arte, Papelaria e Armarinho


epoch,train_loss,valid_loss,accuracy,time
0,1.005644,0.923765,0.93248,00:28
1,0.956945,0.8785,0.94024,00:28


epoch,train_loss,valid_loss,accuracy,time
0,0.919962,0.841767,0.949978,00:29
1,0.839086,0.808523,0.957244,00:28


epoch,train_loss,valid_loss,accuracy,time
0,0.848209,0.812497,0.956255,00:37
1,0.799692,0.792192,0.962681,00:36


epoch,train_loss,valid_loss,accuracy,time
0,0.78071,0.789748,0.963373,00:59
1,0.775088,0.78821,0.964609,00:56


Celulares e Telefones


epoch,train_loss,valid_loss,accuracy,time
0,0.917998,0.860236,0.925041,00:30
1,0.883055,0.839089,0.93104,00:27


epoch,train_loss,valid_loss,accuracy,time
0,0.84361,0.814365,0.939141,00:31
1,0.802929,0.782971,0.945139,00:29


epoch,train_loss,valid_loss,accuracy,time
0,0.8114,0.795561,0.944114,00:35
1,0.76518,0.772691,0.948934,00:38


epoch,train_loss,valid_loss,accuracy,time
0,0.7575,0.771733,0.949805,01:00
1,0.748593,0.77027,0.949754,00:59


Instrumentos Musicais


epoch,train_loss,valid_loss,accuracy,time
0,1.109614,1.015523,0.913211,00:40
1,1.067926,0.97934,0.920904,00:39


epoch,train_loss,valid_loss,accuracy,time
0,1.009308,0.945353,0.929243,00:41
1,0.941795,0.900157,0.936678,00:42


epoch,train_loss,valid_loss,accuracy,time
0,0.950929,0.90598,0.937809,00:49
1,0.896887,0.884966,0.940848,00:50


epoch,train_loss,valid_loss,accuracy,time
0,0.879374,0.880984,0.941235,01:29
1,0.867765,0.880069,0.941914,01:30


Saude


epoch,train_loss,valid_loss,accuracy,time
0,1.067875,1.011066,0.918384,00:22
1,1.008841,0.954517,0.92744,00:22


epoch,train_loss,valid_loss,accuracy,time
0,0.996604,0.909732,0.938829,00:25
1,0.908517,0.878816,0.947505,00:25


epoch,train_loss,valid_loss,accuracy,time
0,0.924391,0.88841,0.947126,00:28
1,0.859403,0.859581,0.952061,00:30


epoch,train_loss,valid_loss,accuracy,time
0,0.837993,0.858393,0.953416,00:44
1,0.834867,0.855572,0.953308,00:50


Eletrodomesticos


epoch,train_loss,valid_loss,accuracy,time
0,1.092798,1.034518,0.912817,00:48
1,1.047081,0.972338,0.925272,00:46


epoch,train_loss,valid_loss,accuracy,time
0,0.984289,0.933504,0.933453,00:49
1,0.936245,0.897982,0.94141,00:48


epoch,train_loss,valid_loss,accuracy,time
0,0.935421,0.90592,0.940454,01:00
1,0.888014,0.881885,0.945683,01:01


epoch,train_loss,valid_loss,accuracy,time
0,0.874689,0.881206,0.946217,01:39
1,0.87166,0.877646,0.946273,01:35


Joias e Relogios


epoch,train_loss,valid_loss,accuracy,time
0,0.831192,0.764625,0.914944,00:13
1,0.773494,0.729644,0.921727,00:13


epoch,train_loss,valid_loss,accuracy,time
0,0.747878,0.703973,0.932601,00:14
1,0.693345,0.672716,0.942937,00:14


epoch,train_loss,valid_loss,accuracy,time
0,0.693516,0.68316,0.940461,00:17
1,0.64732,0.655892,0.947352,00:16


epoch,train_loss,valid_loss,accuracy,time
0,0.635701,0.655136,0.947136,00:26
1,0.626032,0.655339,0.947244,00:26


Antiguidades e Colecoes


epoch,train_loss,valid_loss,accuracy,time
0,0.706477,0.613008,0.963131,00:09
1,0.635353,0.582454,0.9664,00:10


epoch,train_loss,valid_loss,accuracy,time
0,0.609691,0.556719,0.977479,00:10
1,0.570779,0.545003,0.980385,00:09


epoch,train_loss,valid_loss,accuracy,time
0,0.568476,0.567042,0.979114,00:12
1,0.542913,0.536752,0.982383,00:11


epoch,train_loss,valid_loss,accuracy,time
0,0.532014,0.537031,0.982746,00:17
1,0.531324,0.534374,0.982928,00:18


Cameras e Acessorios


epoch,train_loss,valid_loss,accuracy,time
0,1.051016,0.94684,0.906965,00:25
1,0.983173,0.914703,0.914869,00:26


epoch,train_loss,valid_loss,accuracy,time
0,0.941929,0.873094,0.925353,00:28
1,0.881459,0.845963,0.931884,00:30


epoch,train_loss,valid_loss,accuracy,time
0,0.880423,0.861497,0.930238,00:35
1,0.83556,0.831648,0.936056,00:36


epoch,train_loss,valid_loss,accuracy,time
0,0.81155,0.834381,0.93666,00:57
1,0.806148,0.833241,0.937208,00:55


Alimentos e Bebidas


epoch,train_loss,valid_loss,accuracy,time
0,1.349389,1.247893,0.846495,00:34
1,1.282839,1.187296,0.859676,00:32


epoch,train_loss,valid_loss,accuracy,time
0,1.215383,1.125815,0.875295,00:34
1,1.118105,1.069542,0.890673,00:34


epoch,train_loss,valid_loss,accuracy,time
0,1.107785,1.076459,0.891871,00:42
1,1.025275,1.030349,0.902257,00:40


epoch,train_loss,valid_loss,accuracy,time
0,1.015563,1.028322,0.902656,01:12
1,0.995355,1.025938,0.903615,01:04


Beleza e Cuidado Pessoal


epoch,train_loss,valid_loss,accuracy,time
0,1.389918,1.243868,0.857308,00:59
1,1.297325,1.204549,0.867625,00:57


epoch,train_loss,valid_loss,accuracy,time
0,1.2108,1.131508,0.88496,01:05
1,1.143253,1.078536,0.897504,01:01


epoch,train_loss,valid_loss,accuracy,time
0,1.138407,1.085245,0.896495,01:20
1,1.07424,1.046237,0.905341,01:20


epoch,train_loss,valid_loss,accuracy,time
0,1.060905,1.044551,0.906875,02:15
1,1.056193,1.04029,0.907106,02:01


Animais


epoch,train_loss,valid_loss,accuracy,time
0,1.025926,0.949046,0.904271,00:25
1,0.960335,0.911027,0.912235,00:24


epoch,train_loss,valid_loss,accuracy,time
0,0.915742,0.857876,0.930399,00:25
1,0.845515,0.821212,0.939617,00:27


epoch,train_loss,valid_loss,accuracy,time
0,0.84921,0.818484,0.939399,00:31
1,0.80422,0.799764,0.945999,00:33


epoch,train_loss,valid_loss,accuracy,time
0,0.783731,0.80368,0.946272,00:47
1,0.773416,0.797677,0.947035,00:54


Carros, Motos e Outros


epoch,train_loss,valid_loss,accuracy,time
0,0.0,0.0,1.0,00:04
1,0.0,0.0,1.0,00:04


epoch,train_loss,valid_loss,accuracy,time
0,0.0,0.0,1.0,00:04
1,0.0,0.0,1.0,00:04


epoch,train_loss,valid_loss,accuracy,time
0,0.0,0.0,1.0,00:04
1,0.0,0.0,1.0,00:04


epoch,train_loss,valid_loss,accuracy,time
0,0.0,0.0,1.0,00:05
1,0.0,0.0,1.0,00:05


Bebes


epoch,train_loss,valid_loss,accuracy,time
0,1.033053,0.941839,0.916632,00:27
1,0.97305,0.910845,0.923816,00:26


epoch,train_loss,valid_loss,accuracy,time
0,0.941701,0.880993,0.932354,00:28
1,0.870569,0.849677,0.938139,00:29


epoch,train_loss,valid_loss,accuracy,time
0,0.890332,0.871208,0.937625,00:34
1,0.827293,0.832478,0.943457,00:33


epoch,train_loss,valid_loss,accuracy,time
0,0.815546,0.831971,0.944483,00:53
1,0.804676,0.830969,0.944623,00:57


Calcados, Roupas e Bolsas


epoch,train_loss,valid_loss,accuracy,time
0,1.199041,1.102167,0.889271,00:44
1,1.158309,1.06173,0.896436,00:47


epoch,train_loss,valid_loss,accuracy,time
0,1.095216,1.020744,0.907298,00:48
1,1.040402,0.969981,0.916466,00:50


epoch,train_loss,valid_loss,accuracy,time
0,1.033224,0.977943,0.917955,01:00
1,0.987079,0.946012,0.923938,01:01


epoch,train_loss,valid_loss,accuracy,time
0,0.973436,0.944453,0.924529,01:33
1,0.959094,0.94281,0.924734,01:38


Agro, Industria e Comercio


epoch,train_loss,valid_loss,accuracy,time
0,1.209654,1.121213,0.916461,00:37
1,1.145508,1.070451,0.927762,00:37


epoch,train_loss,valid_loss,accuracy,time
0,1.08701,1.020856,0.941423,00:38
1,1.007335,0.970689,0.950184,00:37


epoch,train_loss,valid_loss,accuracy,time
0,1.008809,0.968859,0.949862,00:47
1,0.951684,0.940621,0.955477,00:49


epoch,train_loss,valid_loss,accuracy,time
0,0.940168,0.938845,0.956478,01:17
1,0.926458,0.936864,0.956872,01:13


Acessorios para Veiculos


epoch,train_loss,valid_loss,accuracy,time
0,1.680203,1.500496,0.863067,03:55
1,1.607386,1.450798,0.87387,04:04


epoch,train_loss,valid_loss,accuracy,time
0,1.441619,1.33138,0.894295,04:18
1,1.352207,1.271848,0.905109,04:09


epoch,train_loss,valid_loss,accuracy,time
0,1.354953,1.276582,0.904524,05:20
1,1.287605,1.227547,0.913459,05:10


epoch,train_loss,valid_loss,accuracy,time
0,1.278187,1.224967,0.914276,09:01
1,1.261517,1.218629,0.915332,08:56


Ferramentas e Construcao


epoch,train_loss,valid_loss,accuracy,time
0,1.451396,1.327978,0.875522,01:30
1,1.388002,1.270747,0.886987,01:35


epoch,train_loss,valid_loss,accuracy,time
0,1.281643,1.194576,0.902293,01:41
1,1.217817,1.142824,0.91389,01:35


epoch,train_loss,valid_loss,accuracy,time
0,1.210534,1.154316,0.915494,01:56
1,1.146268,1.105544,0.922661,02:03


epoch,train_loss,valid_loss,accuracy,time
0,1.121866,1.101935,0.92353,03:15
1,1.114629,1.098312,0.924869,03:17


Informatica


epoch,train_loss,valid_loss,accuracy,time
0,1.237879,1.147946,0.898287,01:21
1,1.173452,1.101982,0.907316,01:23


epoch,train_loss,valid_loss,accuracy,time
0,1.114239,1.058431,0.917233,01:23
1,1.048172,1.005351,0.926937,01:21


epoch,train_loss,valid_loss,accuracy,time
0,1.056023,1.025029,0.926802,01:48
1,1.000698,0.984108,0.932667,01:40


epoch,train_loss,valid_loss,accuracy,time
0,0.979024,0.980274,0.933612,02:42
1,0.982339,0.978287,0.933902,02:44


Festas e Lembrancinhas


epoch,train_loss,valid_loss,accuracy,time
0,0.8852,0.799882,0.93296,00:16
1,0.832264,0.772391,0.937855,00:16


epoch,train_loss,valid_loss,accuracy,time
0,0.797142,0.72979,0.951228,00:18
1,0.741443,0.712083,0.956909,00:18


epoch,train_loss,valid_loss,accuracy,time
0,0.742613,0.709988,0.957871,00:21
1,0.699676,0.69714,0.959706,00:20


epoch,train_loss,valid_loss,accuracy,time
0,0.675801,0.696828,0.960318,00:31
1,0.675874,0.695994,0.960668,00:32


Esportes e Fitness


epoch,train_loss,valid_loss,accuracy,time
0,1.336211,1.223807,0.898612,01:27
1,1.275953,1.176804,0.909645,01:26


epoch,train_loss,valid_loss,accuracy,time
0,1.208495,1.117863,0.922947,01:25
1,1.124289,1.06599,0.932413,01:26


epoch,train_loss,valid_loss,accuracy,time
0,1.144534,1.073724,0.932557,01:54
1,1.072437,1.03895,0.939696,01:46


epoch,train_loss,valid_loss,accuracy,time
0,1.05065,1.03346,0.940831,03:01
1,1.039768,1.032148,0.941161,03:03


Mais Categorias


epoch,train_loss,valid_loss,accuracy,time
0,1.109992,1.033948,0.876388,00:24
1,1.050829,0.997619,0.882822,00:24


epoch,train_loss,valid_loss,accuracy,time
0,1.042953,0.966331,0.893325,00:24
1,0.965581,0.926339,0.902287,00:25


epoch,train_loss,valid_loss,accuracy,time
0,0.962655,0.933219,0.901243,00:29
1,0.907491,0.909525,0.908556,00:29


epoch,train_loss,valid_loss,accuracy,time
0,0.88008,0.907639,0.909546,00:47
1,0.876795,0.907175,0.908996,00:48


Brinquedos e Hobbies


epoch,train_loss,valid_loss,accuracy,time
0,1.461611,1.36512,0.808352,00:38
1,1.405679,1.297419,0.824144,00:38


epoch,train_loss,valid_loss,accuracy,time
0,1.315383,1.223636,0.844274,00:40
1,1.206116,1.165316,0.85739,00:43


epoch,train_loss,valid_loss,accuracy,time
0,1.227202,1.169296,0.860946,00:52
1,1.131707,1.126544,0.868972,00:53


epoch,train_loss,valid_loss,accuracy,time
0,1.116897,1.121297,0.871941,01:17
1,1.088845,1.121599,0.873409,01:25


Games


epoch,train_loss,valid_loss,accuracy,time
0,0.906269,0.79072,0.895718,00:10
1,0.830217,0.756417,0.903488,00:10


epoch,train_loss,valid_loss,accuracy,time
0,0.803934,0.72439,0.916609,00:10
1,0.734604,0.701887,0.926623,00:11


epoch,train_loss,valid_loss,accuracy,time
0,0.740645,0.731478,0.927659,00:12
1,0.693329,0.684586,0.929385,00:13


epoch,train_loss,valid_loss,accuracy,time
0,0.661544,0.683621,0.929903,00:19
1,0.653426,0.683878,0.93163,00:19


Casa, Moveis e Decoracao


epoch,train_loss,valid_loss,accuracy,time
0,1.710633,1.562696,0.831166,03:12
1,1.640616,1.507686,0.842625,03:16


epoch,train_loss,valid_loss,accuracy,time
0,1.523099,1.435784,0.861806,03:20
1,1.45629,1.347985,0.875983,03:18


epoch,train_loss,valid_loss,accuracy,time
0,1.430788,1.367077,0.875347,04:13
1,1.353882,1.298403,0.886948,04:22


epoch,train_loss,valid_loss,accuracy,time
0,1.331114,1.293222,0.889017,06:47
1,1.327834,1.287044,0.890277,07:13


Livros, Revistas e Comics


epoch,train_loss,valid_loss,accuracy,time
0,0.853895,0.764218,0.763573,00:07
1,0.799639,0.741912,0.778459,00:07


epoch,train_loss,valid_loss,accuracy,time
0,0.775281,0.747197,0.772329,00:07
1,0.715195,0.690426,0.80648,00:07


epoch,train_loss,valid_loss,accuracy,time
0,0.694489,0.693555,0.809107,00:08
1,0.647932,0.680557,0.812026,00:09


epoch,train_loss,valid_loss,accuracy,time
0,0.60737,0.683403,0.814653,00:12
1,0.601936,0.68903,0.815528,00:12


In [23]:
cat = 'Livros, Revistas e Comics'
learn_c = text_classifier_learner(data_class, AWD_LSTM, pretrained=False, config=config, drop_mult=0.5)
learn_c.load(f'/data/anime/ml/por/{cat}_model')
learn_c.fit_one_cycle(3, slice(lr/10/(2.6**4),lr/10), wd=wd, moms=(0.8,0.7))

epoch,train_loss,valid_loss,accuracy,time
0,0.464418,0.453853,0.815236,00:12
1,0.447053,0.457287,0.810274,00:12
2,0.434355,0.455159,0.810274,00:12


In [25]:
learn_c.show_results(rows=300)

text,target,prediction
▁xxbos ▁440 7 ▁manga ▁san ct ua ry ▁1 ▁sha ▁f omi m ura ▁conrad ▁4 ▁ ry oi chi ▁ ike g ami,MANGA,MANGA
▁xxbos ▁livra ▁pro g re ▁de ▁la ▁m eta phy s ique ▁en ▁all ema g ne ▁de pu i ▁lei b ni,EROTIC_BOOKS,EROTIC_BOOKS
▁xxbos ▁livra ▁ oe u v re ▁t ▁vi ▁rap port ▁du ▁ phy s ique ▁et ▁du ▁mor al ▁de ▁l,EROTIC_BOOKS,EROTIC_BOOKS
▁xxbos ▁livra ▁voce ▁de c ide ▁com a ▁se ▁s ente ▁ga ry ▁mc ka y ▁e ▁don ▁din k me yer,BOOKS,BOOKS
▁xxbos ▁metoda ▁piana ▁bela ▁bar to k ▁mi k rok os ma ▁vol ▁5 ▁p ▁r ▁a ▁m ▁a ▁c ▁a ▁a,BOOKS,BOOKS
▁xxbos ▁caca da ▁aa ▁poderosa ▁chef aa ▁spa w n ▁a ▁mal dica a ▁da ▁spa w n ▁24 h q,COMICS,BOOKS
▁xxbos ▁livra ▁p our ▁un e ▁union ▁oc ci d enta le ▁entre ▁l ▁euro pe ▁et ▁le ▁ eta t,EROTIC_BOOKS,EROTIC_BOOKS
▁xxbos ▁re ▁integra tion ▁of ▁the ▁ schi zo ph ren ic ▁pa ti ent ▁j ▁a ▁ lie ber man,BOOKS,BOOKS
▁xxbos ▁revista ▁cont iga ▁2 103 ▁van essa ▁ gia com a ▁an itta ▁we s ley ▁sa f ada a,BOOKS,BOOKS
▁xxbos ▁mo tive ▁a 2 ▁ar be it sb uch ▁le k tion ▁9 ▁18 ▁mi t ▁mp 3 ▁audia ▁c,EROTIC_BOOKS,EROTIC_BOOKS


In [7]:
for cat in set(cats.values()):
    print(cat)
    config = awd_lstm_clas_config.copy()
    config['qrnn'] = True
    config['n_hid'] = 1550 #default 1152
    config['n_layers'] = 4 #default 3 
    data_class = load_data('/data/anime/ml/por/', cat + '.pkl', bs=500)
    learn_c = text_classifier_learner(data_class, AWD_LSTM, pretrained=False, config=config, drop_mult=0.3)
    learn_c.load(f'/data/anime/ml/por/{cat}_model')
    learn_c.export(f'/data/anime/ml/por/{cat}_model_export')
    del learn_c
    torch.cuda.empty_cache()
    gc.collect()

Musica, Filmes e Seriados
Casa, Moveis e Decoracao
Eletrodomesticos
Livros, Revistas e Comics
Antiguidades e Colecoes
Agro, Industria e Comercio
Bebes
Games
Cameras e Acessorios
Ingressos
Joias e Relogios
Informatica
Carros, Motos e Outros
Brinquedos e Hobbies
Acessorios para Veiculos
Celulares e Telefones
Esportes e Fitness
Ferramentas e Construcao
Eletronicos, Audio e Video
Saude
Alimentos e Bebidas
Calcados, Roupas e Bolsas
Beleza e Cuidado Pessoal
Instrumentos Musicais
Festas e Lembrancinhas
Mais Categorias
Animais
Arte, Papelaria e Armarinho
