In [1]:
from gpt_4o_mini import *
from svm_tfidf import DataPreparationSVM, SVMClassifier
from utilities import Config
import os
import dotenv
import requests

dotenv.load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

# Models

In [2]:
def get_models() -> list:
    url = f"{os.getenv("PROXY_URL")}/models"
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv("OPENAI_API_KEY")}"}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        models = response.json()
        models = [model["id"] for model in models["data"]]
        return models
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

# Example usage
available_models = get_models()
if available_models:
    print("Available models:", available_models)
else:
    print("Failed to retrieve models")

Available models: ['gpt-4o-mini', 'gpt-4', 'gpt-4o', 'llama3.1:70b', 'o1-mini', 'llama3.1:8b', 'o1-preview', 'gpt-4-turbo']


# GPT-4o-mini

In [6]:
api_key = os.getenv("OPENAI_API_KEY")
proxy_url = os.getenv("PROXY_URL")

config = Config(train_size=20, test_size=20)
data_prep = DataPreparationLLM(config)
data_prep.load_data("../data/open_domain_data.csv", "../data/specific_domain_data.csv")

gpt_model = GPT4Model(api_key=api_key, proxy_url=proxy_url, model_name='gpt-4o-mini')
evaluator = Evaluator()

trainer = Trainer(ClassificationModule, data_prep.train_data, evaluator)
compiled_model = trainer.optimize_model()

predictions = list()
true_labels = [example.label for example in data_prep.test_data]

total_tokens = int()

for example in data_prep.test_data:
    prompt = example.prompt
    prediction = compiled_model(prompt)
    predictions.append(prediction.label)

accuracy = evaluator.evaluate_model(predictions, true_labels)

print(f"Evaluation results: Accuracy {accuracy * 100:.2f}%")

Device: cpu
Train data: 20
Test data: 20
Going to sample between 1 and 4 traces per predictor.
Will attempt to bootstrap 5 candidate sets.


Average Metric: 19 / 20  (95.0): 100%|██████████| 20/20 [00:00<00:00, 413.74it/s]


New best score: 95.0 for seed -3
Scores so far: [95.0]
Best score so far: 95.0


Average Metric: 19 / 20  (95.0): 100%|██████████| 20/20 [00:00<00:00, 484.49it/s] 


Scores so far: [95.0, 95.0]
Best score so far: 95.0


 20%|██        | 4/20 [00:00<?, ?it/s]


Bootstrapped 4 full traces after 5 examples in round 0.


Average Metric: 19 / 20  (95.0): 100%|██████████| 20/20 [00:00<00:00, 534.66it/s]


Scores so far: [95.0, 95.0, 95.0]
Best score so far: 95.0


 20%|██        | 4/20 [00:00<?, ?it/s]


Bootstrapped 4 full traces after 5 examples in round 0.


Average Metric: 19 / 20  (95.0): 100%|██████████| 20/20 [00:00<00:00, 487.58it/s]


Scores so far: [95.0, 95.0, 95.0, 95.0]
Best score so far: 95.0


 10%|█         | 2/20 [00:00<?, ?it/s]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 19 / 20  (95.0): 100%|██████████| 20/20 [00:00<00:00, 405.69it/s] 


Scores so far: [95.0, 95.0, 95.0, 95.0, 95.0]
Best score so far: 95.0


  5%|▌         | 1/20 [00:00<?, ?it/s]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 19 / 20  (95.0): 100%|██████████| 20/20 [00:00<00:00, 868.72it/s] 


Scores so far: [95.0, 95.0, 95.0, 95.0, 95.0, 95.0]
Best score so far: 95.0


 10%|█         | 2/20 [00:00<?, ?it/s]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 19 / 20  (95.0): 100%|██████████| 20/20 [00:00<00:00, 810.05it/s] 


Scores so far: [95.0, 95.0, 95.0, 95.0, 95.0, 95.0, 95.0]
Best score so far: 95.0


 10%|█         | 2/20 [00:00<?, ?it/s]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 19 / 20  (95.0): 100%|██████████| 20/20 [00:00<00:00, 817.30it/s] 


Scores so far: [95.0, 95.0, 95.0, 95.0, 95.0, 95.0, 95.0, 95.0]
Best score so far: 95.0
8 candidate programs found.
Evaluation results: Accuracy 95.00%


In [11]:
trainer.save_model(f"../models/{gpt_model.model_name}.json")

[('prog', Predict(StringSignature(prompt -> reasoning, label
    instructions='Classify if a text is specific for a domain or not. Target domain is law.'
    prompt = Field(annotation=str required=True json_schema_extra={'desc': 'The prompt to classify.', '__dspy_field_type': 'input', 'prefix': 'Prompt:'})
    reasoning = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${reasoning}', '__dspy_field_type': 'output'})
    label = Field(annotation=str required=True json_schema_extra={'desc': '1, if the input text is law domain, 0 otherwise.', '__dspy_field_type': 'output', 'prefix': 'Label:'})
)))]
Model saved to ../models/gpt-4o-mini.json


# SVM-TFIDF

In [7]:
config = Config(train_size=20_000, test_size=10_000, seed=4444)

data_prep = DataPreparationSVM(config)
data = data_prep.load_data("../data/open_domain_data.csv", "../data/specific_domain_data.csv")

X_train, X_test, y_train, y_test = data_prep.prepare_data(data)

Device: cpu
Train size: 20000
Test size: 10000


In [8]:
svm_classifier = SVMClassifier(config)
cross_val_scores = svm_classifier.cross_validate_model(X_train, y_train, cv=5)
print(cross_val_scores)
svm_classifier.train(X_train, y_train)

# Evaluate the classifier 
accuracy, precision, recall = svm_classifier.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")

{'fit_time': array([17.03054047, 16.99490213, 17.06809402, 17.08671927, 24.49330306]), 'score_time': array([4.01497626, 3.96303582, 3.97708201, 4.43841743, 4.03846121]), 'test_accuracy': array([0.959  , 0.95475, 0.951  , 0.954  , 0.964  ]), 'test_precision': array([0.941247  , 0.93645084, 0.93273273, 0.93686109, 0.9510574 ]), 'test_recall': array([0.9596577 , 0.95418448, 0.94868662, 0.95174099, 0.96151497])}
Accuracy: 96.06%
Precision: 94.81%
Recall: 95.47%


## Test overfitting

In [9]:
train_accuracy, train_precision, train_recall = svm_classifier.evaluate(X_train, y_train)
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Training Precision: {train_precision * 100:.2f}%")
print(f"Training Recall: {train_recall * 100:.2f}%")

Training Accuracy: 96.84%
Training Precision: 95.77%
Training Recall: 96.54%


In [10]:
# Save the model
svm_classifier.save_model(f"../models/{svm_classifier.model_name}.joblib")

Model saved to ../models/SVM_TFIDF.joblib


# LSTM + CNN