# Models

This notebook is used to train and evaluate the various models tested for difficulty.

In [1]:
# --------------------------------- VARIABLES -------------------------------- #
DATASET_TO_TRAIN = ["french_difficulty", "sentences", "ljl"]
CONTEXT_TO_TRY = ["empty", "CECRL"]
MODEL_TO_TRAIN = ["gpt-3.5-turbo-1106"]

In [2]:
# ---------------------------- PREPARING NOTEBOOK ---------------------------- #
# Autoreload
%load_ext autoreload
%autoreload 2

# Random seed
import numpy as np
np.random.seed(42)

# External modules
import os
from IPython.display import display, Markdown, Latex, clear_output
from tqdm import notebook as tqdm

# Set global log level
import logging
logging.basicConfig(level=logging.INFO)
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# Define PWD as the current git repository
import git
repo = git.Repo('.', search_parent_directories=True)
pwd = repo.working_dir
os.chdir(pwd)

## Creating the datasets

In [3]:
from src.DataManager import DataManager

data_manager = DataManager()

for dataset in DATASET_TO_TRAIN:
    for context in CONTEXT_TO_TRY:
        for model in MODEL_TO_TRAIN:
            data_manager.get_data_ready_for_fine_tuning(
                dataset=dataset, type_set="train", context=context, model_name=model
            )
            data_manager.get_data_ready_for_fine_tuning(
                dataset=dataset, type_set="test", context=context, model_name=model
            )

INFO:DataManager:Initializing data manager...


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.


## Training the models

In [4]:
from src.DifficultyEstimationModel import DifficultyEstimationModel

model_dict = {}

for dataset in DATASET_TO_TRAIN:
    for context in CONTEXT_TO_TRY:
        for model_name in MODEL_TO_TRAIN:
            model = DifficultyEstimationModel(model=model_name)
            model.fine_tune(
                f"train_{dataset}_{context}_{model_name}_prepared_for_fine_tuning"
            )
            model_dict[f"{dataset}_{context}"] = model

INFO:DifficultyEstimationModel:Initializing model gpt-3.5-turbo-1106...
INFO:DifficultyEstimationModel:Waiting for file to be processed...
INFO:DifficultyEstimationModel:File processed ! Starting fine tuning...
INFO:DifficultyEstimationModel:Fine tuning finished ! Saving model...
INFO:DifficultyEstimationModel:Loaded trained_models.csv.
INFO:DifficultyEstimationModel:Model saved !
INFO:DifficultyEstimationModel:Initializing model gpt-3.5-turbo-1106...
INFO:DifficultyEstimationModel:Waiting for file to be processed...
INFO:DifficultyEstimationModel:File processed ! Starting fine tuning...
INFO:DifficultyEstimationModel:Fine tuning finished ! Saving model...
INFO:DifficultyEstimationModel:Loaded trained_models.csv.
INFO:DifficultyEstimationModel:Model saved !
INFO:DifficultyEstimationModel:Initializing model gpt-3.5-turbo-1106...
INFO:DifficultyEstimationModel:Waiting for file to be processed...
INFO:DifficultyEstimationModel:File processed ! Starting fine tuning...
INFO:DifficultyEstima