In [1]:
%load_ext autoreload
%autoreload 2

from fastai.text.all import *
from fastai.vision.all import *
import pandas as pd
import torch
from tqdm.notebook import tqdm

from utils import get_dls

In [2]:
seed = 42

# python RNG
import random
random.seed(seed)

# pytorch RNGs
import torch
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

# numpy RNG
import numpy as np
np.random.seed(seed)

# tensorflow RNG
# tf.random.set_seed(seed)

In [4]:
data_path = Path("/mnt/nas/backups/08-07-2020/desktopg01/lisa/Data/CSV")

In [5]:
train = pd.read_csv(data_path/"train_small.csv", usecols=['document_type', 'body'])
val = pd.read_csv(data_path/"validation_small.csv", usecols=['document_type', 'body'])
test_data = pd.read_csv(data_path/"test_small.csv", usecols=['document_type', 'body'])

In [7]:
train_counts = train["document_type"].value_counts()
val_counts = val["document_type"].value_counts()
test_counts = test_data["document_type"].value_counts()
train_counts, val_counts, test_counts 

(outros                              134134
 peticao_do_RE                         9509
 agravo_em_recurso_extraordinario      2546
 sentenca                              2129
 acordao_de_2_instancia                 553
 despacho_de_admissibilidade            346
 Name: document_type, dtype: int64,
 outros                              84104
 peticao_do_RE                        6364
 agravo_em_recurso_extraordinario     2149
 sentenca                             1636
 acordao_de_2_instancia                299
 despacho_de_admissibilidade           183
 Name: document_type, dtype: int64,
 outros                              85408
 peticao_do_RE                        6331
 agravo_em_recurso_extraordinario     1841
 sentenca                             1475
 acordao_de_2_instancia                273
 despacho_de_admissibilidade           198
 Name: document_type, dtype: int64)

In [3]:
path = Path("/mnt/nas/backups/08-07-2020/desktopg01/lisa/Data/small_flow")
dls = get_dls(path, 64, 224)

In [4]:
test_items = get_image_files(path, folders="test")

In [5]:
test_dl = dls.test_dl(test_items, with_labels=True)

In [21]:
dls.categorize.vocab

['acordao_de_2_instancia', 'agravo_em_recurso_extraordinario', 'despacho_de_admissibilidade', 'outros', 'peticao_do_RE', 'sentenca']

In [16]:
torch.bincount(tensor(dls.train.tls[1]))

tensor([   583,   4220,    361, 140786,  10181,   2177])

In [17]:
torch.bincount(tensor(dls.valid.tls[1]))

tensor([  314,  2650,   183, 91434,  6803,  1613])

In [18]:
torch.bincount(tensor(test_dl.tls[1]))

tensor([  285,  2537,   198, 87902,  6177,  1478])