In [30]:
import os
from nn import SimpleFFNN
from train import Train
from preProcessing import PreProcessing
import numpy as np
from testModel import TestModel

In [31]:
def split_array(data: np.ndarray, train_size: float = 0.8):
    """
    Divide um array 2D em dois arrays: um com train_size dos dados e outro com o restante.
    
    :param data: O array 2D a ser dividido.
    :param train_size: A proporção de dados a serem usados para o primeiro array (default é 0.8).
    :return: Dois arrays 2D, um com os dados de treinamento e outro com os dados de teste.
    """
    # Calcula o índice para a divisão
    split_index = int(len(data) * train_size)
    
    # Embaralha os dados
    np.random.shuffle(data)
    
    # Divide o array
    train_data = data[:split_index]
    test_data = data[split_index:]
    
    return train_data, test_data

In [32]:
genres = ['drama', 'comedy', 'horror', 'action', 'romance', 'western', 'animation', 'crime', 'sci-fi']
    
#file onde já está o modelo treinado"
model=None
newPKL = "Pedro_3_20_30,20,10"

nodes_outuput=[len(genres)]

#limpesa de ficheiro de treino
data="train"

In [33]:
print("\033[34mPre Processing the data\n\033[0m")
pp=PreProcessing(data+".txt")
clean_data=pp.returnCleanText()
print("\033[32mPre Processing Completed!\n\033[0m")

[34mPre Processing the data
[0m
[32mPre Processing Completed!
[0m


In [34]:
data_to_train, data_to_test = split_array(clean_data)

# Carregar o modelo se ele já existir
if os.path.isfile("data/"+newPKL+".pkl"):
    print("\033[34mLoading Model\n\033[0m")
    
    model=SimpleFFNN.load_model("data/"+newPKL+".pkl")
    
    print("\033[32mLoading Completed!\n\033[0m")
# Se não existir, cria e treina um novo modelo   
else:
    print("\033[34mCreating a new Model\n\033[0m")
    
    layer_hidden = [30,20,10] # adicionar o numero de nos por camada,hidden, que bem se entender([5]->5 nos na camada hidden1; [3,6]-> 3 na camada hidden 1 e 6 na hidden 2;...)
    learning_rate = 0.01
    epochs=20
    model = Train(data_to_train,newPKL,layer_hidden, learning_rate, epochs)
    model.train()
    
    print("\033[32mModel Created!\n\033[0m")

[34mCreating a new Model
[0m
Epoch 0, Loss 0.09640203276292635
Epoch 1, Loss 0.09611743551234485
Epoch 2, Loss 0.09571675070962864
Epoch 3, Loss 0.0950562451154483
Epoch 4, Loss 0.09407559324751917
Epoch 5, Loss 0.09279151714209403
Epoch 6, Loss 0.09130532013837711
Epoch 7, Loss 0.08973853982479016
Epoch 8, Loss 0.08821765162153616
Epoch 9, Loss 0.08665590014239556
Epoch 10, Loss 0.08513274124085124
Epoch 11, Loss 0.08368133087257924
Epoch 12, Loss 0.08231160294671012
Epoch 13, Loss 0.08101095526441487
Epoch 14, Loss 0.0797615556190171
Epoch 15, Loss 0.07854505803914233
Epoch 16, Loss 0.07734309688481951
Epoch 17, Loss 0.07613594559733232
Epoch 18, Loss 0.07490440736659688
Epoch 19, Loss 0.07363408441095508
[32mModel Created!
[0m


In [35]:
model_teste= TestModel(newPKL)

# Testar e comparar labels
model_teste.test_with_label(data_to_test)

# Testar e apenas escrever resultados no ficheiro 'results.txt'
# pp_no_label = PreProcessing("test_no_labels")
# clean_data_no_label = pp_no_label.returnCleanText()
# model_teste.test_without_labels(clean_data_no_label)

[34mTesting the Model with Labels
[0m
[34m'Model: ['horror']', Test: 'horror'[0m
[34m'Model: ['western']', Test: 'western'[0m
[31m'Model: ['comedy']', Test: 'drama'[0m
[34m'Model: ['drama']', Test: 'drama'[0m
[34m'Model: ['drama']', Test: 'drama'[0m
[31m'Model: ['horror']', Test: 'crime'[0m
[34m'Model: ['drama']', Test: 'drama'[0m
[34m'Model: ['drama']', Test: 'drama'[0m
[34m'Model: ['drama']', Test: 'drama'[0m
[31m'Model: ['drama']', Test: 'comedy'[0m
[34m'Model: ['horror']', Test: 'horror'[0m
[34m'Model: ['drama']', Test: 'drama'[0m
[31m'Model: ['horror']', Test: 'animation'[0m
[31m'Model: ['romance']', Test: 'western'[0m
[31m'Model: ['drama']', Test: 'western'[0m
[31m'Model: ['drama']', Test: 'comedy'[0m
[34m'Model: ['drama']', Test: 'drama'[0m
[34m'Model: ['drama']', Test: 'drama'[0m
[31m'Model: ['horror']', Test: 'crime'[0m
[31m'Model: ['horror']', Test: 'drama'[0m
[34m'Model: ['horror']', Test: 'horror'[0m
[34m'Model: ['animation']', T