If use Colab set a GPU environment and run cells markdown required by Colab

In [None]:
# Colab required
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Colab required
# change direction to repo folder
%cd path_to_repo

In [None]:
# Colab required
# update libraries after every runtime reset
!pip install -r requirements.txt

In [None]:
import sys
import os
# Add src as folder from where to import
parent_dir = os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), '.', 'src'))

# Add this directory to sys.path
sys.path.append(parent_dir)

In [None]:
from src import LLMmanager, utils
import mlflow
# this code uploads the credentials to MLflow given in the .env file
from src.settings import (
    MLFLOW_TRACKING_URI,
    MLFLOW_TRACKING_USERNAME,
    MLFLOW_TRACKING_PASSWORD,
)

if MLFLOW_TRACKING_URI is None:
    print('importing mlflow credentials failed')

experiment_name = 'Mateusz_PatentMatchBaseline_test'

# If you have provided the name of an experiment that does not exist or that existed in MLflow but was deleted, create a new experiment.
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None or experiment.lifecycle_stage == 'deleted':
    mlflow.create_experiment(experiment_name)
    experiment = mlflow.get_experiment_by_name(experiment_name)

In [None]:
# define model parameters
config = {
        'learning_rate': 2e-5,
        'batch_size': 32,
        'num_epochs': 4,
        'max_length': 512,
        'test_size': 0.1,
        'random_state': 42,
    }

In [None]:
# initialize Trainer
trainer = LLMmanager.TextSimilarityLLMManager(
        model_name='bert-base-uncased',
    tokenizer_name = 'bert-base-uncased',
        config=config,
    verbose=True,
    MLFlow_reporting=True
    )

In [None]:
with mlflow.start_run(experiment_id=experiment.experiment_id):
  # set run name
  mlflow.set_tag(key='mlflow.runName',
                       value=f'{utils.timestamp()}_{MLFLOW_TRACKING_USERNAME}')
  trainer.run(test_path='data/test_dataset.json', train_path='data/train_dataset.json')
mlflow.end_run()