In [None]:
from gpt_4o_mini import *
from svm_tfidf import DataPreparationSVM, SVMClassifier
from utilities import Config
import os
import dotenv

# GPT-4o-mini

In [None]:
dotenv.load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
proxy_url = os.getenv("PROXY_URL")

config = Config(train_size=20, test_size=20)
data_prep = DataPreparationLLM(config)
data_prep.load_data("../data/open_domain_data.csv", "../data/specific_domain_data.csv")

gpt_model = GPT4Model(api_key=api_key, proxy_url=proxy_url, )
evaluator = Evaluator(gpt_model)

trainer = Trainer(ClassificationModule, data_prep.train_data, evaluator)
compiled_model = trainer.optimize_model()
print("Model optimized and compiled.")
trainer.save_model(f"../models/{gpt_model.model_name}.json")

predictions = list()
true_labels = [example.label for example in data_prep.test_data]
total_tokens = int()

for example in data_prep.test_data:
    prompt = example.prompt
    prediction = compiled_model(prompt)
    predictions.append(prediction.label)

    total_tokens += evaluator.count_tokens(prompt)
    total_tokens += evaluator.count_tokens(prediction.label)

accuracy = evaluator.evaluate_model(predictions, true_labels)
price = evaluator.calculate_price(total_tokens)

print(f"Evaluation results: Accuracy {accuracy * 100:.2f}%")
print(f"Total price: ${price:.4f} USD")

# SVM-TFIDF

In [None]:

config = Config(train_size=15_000, test_size=5_000)

data_prep = DataPreparationSVM(config)
data = data_prep.load_data("../data/open_domain_data.csv", "../data/specific_domain_data.csv")

X_train, X_test, y_train, y_test = data_prep.prepare_data(data)


In [None]:
svm_classifier = SVMClassifier(config)
svm_classifier.train(X_train, y_train)

# Evaluate the classifier and print results
accuracy = svm_classifier.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Save the model
svm_classifier.save_model(f"..models/{svm_classifier.model_name}.joblib")