# Imports

In [1]:
import os
from nn import SimpleFFNN
from train import Train
from preProcessing import PreProcessing
import numpy as np
from testModel import TestModel

# Função para dividir os dados de treino e de teste

In [2]:
def split_array(data: np.ndarray, train_size: float = 0.8):
    """
    Divide um array 2D em dois arrays: um com train_size dos dados e outro com o restante.
    
    :param data: O array 2D a ser dividido.
    :param train_size: A proporção de dados a serem usados para o primeiro array (default é 0.8).
    :return: Dois arrays 2D, um com os dados de treinamento e outro com os dados de teste.
    """
    # Calcula o índice para a divisão
    split_index = int(len(data) * train_size)
    
    # Embaralha os dados
    np.random.shuffle(data)
    
    # Divide o array
    train_data = data[:split_index]
    test_data = data[split_index:]
    
    return train_data, test_data

# Variaveis onde está os ficheiros de treino e o modelo

In [6]:
#file onde já está o modelo treinado"
model=None
newPKL = "Pedro_6"


#limpesa de ficheiro de treino
data="train"
#ficheiro onde vai ser feita ao autoavaliacao
avaliation = "test_no_labels"

## Pre Processamento

In [3]:
print("\033[34mPre Processing the data\n\033[0m")
pp=PreProcessing(data+".txt")
clean_data=pp.returnCleanText()
print("\033[32mPre Processing Completed!\n\033[0m")

[34mPre Processing the data
[0m
[32mPre Processing Completed!
[0m


# Dividir a train_data do test_data

In [42]:
data_to_train, data_to_test = split_array(clean_data)

## Carregar o modelo ou Criar um novo
#### Conforme o nome dado a variavel 'newPKL'

In [7]:
# Carregar o modelo se ele já existir
if os.path.isfile("data/"+newPKL+".pkl"):
    print("\033[34mLoading Model\n\033[0m")
    
    model=SimpleFFNN.load_model("data/"+newPKL+".pkl")
    
    print("\033[32mLoading Completed!\n\033[0m")
# Se não existir, cria e treina um novo modelo   
else:
    print("\033[34mCreating a new Model\n\033[0m")
    
    layer_hidden = [50,25,9] # adicionar o numero de nos por camada,hidden, que bem se entender([5]->5 nos na camada hidden1; [3,6]-> 3 na camada hidden 1 e 6 na hidden 2;...)
    learning_rate = 0.01
    epochs=50
    model = Train(data_to_train,newPKL,layer_hidden, learning_rate, epochs)
    model.train()
    
    print("\033[32mModel Created!\n\033[0m")

[34mLoading Model
[0m
[32mLoading Completed!
[0m


# Variavel para testar os modelos

In [8]:
model_teste= TestModel(newPKL)

# Teste do modelo com o ficheiro com as labels identificadas
##### 'train.txt'

In [4]:
# Testar e comparar labels
model_teste.test_with_label(data_to_test)

NameError: name 'model_teste' is not defined

# Gerar ficheiro com os resultados do modelo, com inouts do ficheiro sem  as labels
##### 'test_no_labels.txt'

In [5]:
# Testar e apenas escrever resultados no ficheiro 'results.txt'
pp_no_label = PreProcessing(avaliation+".txt")
clean_data_no_label = pp_no_label.returnCleanText(plot_index=3)
model_teste.test_without_labels(clean_data_no_label)

[34mTesting the Model without Labels
[0m
[36mTest Completed!
[0m


# Testar apenas com 1 frase de input

In [12]:
texto="Romeo and Juliet is a play written by Shakespeare. It is a tragic love story where the two main characters, Romeo and Juliet, are supposed to be sworn enemies but fall in love. Due to their families' ongoing conflict, they cannot be together, so they kill themselves because they cannot cope with being separated from one another. Romeo and Juliet is a Shakespearean tragedy"
clean_texto=PreProcessing.returnCleanInputText(texto)
genre="romance"
model_teste.test_from_input(clean_texto,genre)

[34mTrying to predict the genre
[0m
[ True]
[32mFor the movie given by input the model said: romance', and was: 'romance'[0m


# Testar com o nosso ficheiro de teste
##### 'our_data.txt'

In [9]:
print("\033[34mPre Processing the Our data\n\033[0m")
pp=PreProcessing("our_data.txt")
clean_our_data=pp.returnCleanText(plot_index=2)
print("\033[32mPre Processing Completed!\n\033[0m")

[34mPre Processing the Our data
[0m
[32mPre Processing Completed!
[0m


In [13]:
model_teste.test_with_label(clean_our_data,genre_index=1,plot_index=2)

[34mTesting the Model with Labels
[0m
[31mFor the movie 'Dune' the model said: animation', and was: 'sci-fi'[0m
[32mFor the movie 'Shrek' the model said: animation', and was: 'animation'[0m
[32mFor the movie 'The Teacher' the model said: drama', and was: 'drama'[0m
[32mFor the movie 'Coco' the model said: animation', and was: 'animation'[0m
[32mFor the movie 'How I Met Your Mother' the model said: comedy', and was: 'comedy'[0m
[31mFor the movie 'Scary Movie' the model said: horror', and was: 'comedy'[0m
[31mFor the movie 'Back to the Future' the model said: action', and was: 'sci-fi'[0m
[31mFor the movie 'Scream' the model said: action', and was: 'horror'[0m
[31mFor the movie 'Fast and Furious' the model said: drama', and was: 'action'[0m
[32mFor the movie '10 Things I Ate About You' the model said: romance', and was: 'romance'[0m
[32mFor the movie 'O Rei Leao' the model said: animation', and was: 'animation'[0m
[31mFor the movie 'Django Unchained' the model sa