In [None]:
from llm_classifier import LMTrainer, LMClassifier
from svm_tfidf import SVMClassifier
import os
import dotenv
import requests
import wandb
import random

dotenv.load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

# Models

In [None]:
def get_models() -> list:
    url = f"{os.getenv("PROXY_URL")}/models"
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv("OPENAI_API_KEY")}"}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        models = response.json()
        models = [model["id"] for model in models["data"]]
        return models
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

# Example usage
available_models = get_models()
if available_models:
    print("Available models:", available_models)
else:
    print("Failed to retrieve models")

# GPT-4o-mini

In [None]:
api_key = os.getenv("OPENAI_API_KEY")
proxy_url = os.getenv("PROXY_URL")

gpt_model = LMClassifier(api_key=api_key, proxy_url=proxy_url, domain='law', model_name='gpt-4o-mini', train_size=10, test_size=20)
gpt_model.load_data(open_path="../data/open_domain_data.csv", specific_path="../data/specific_domain_data.csv")

trainer = LMTrainer(gpt_model.train_data)

In [None]:
compiled_model = trainer.optimize_model()
accuracy = trainer.test_model(gpt_model.test_data)

print(f"Evaluation results: Accuracy {accuracy * 100:.2f}%")

trainer.save_model(f"../models/{gpt_model.model_name}.json")

In [None]:
trainer.load_model(f"../models/{gpt_model.model_name}.json")

results = trainer.predict("How does law balance the protection of individual rights with the needs of society?")

print(f"Prediction results: {results}")

# SVM-TFIDF

In [None]:
def sweep_svm_classifier(config=None):
    wandb.init(config=config)
    config = wandb.config

    svm_classifier = SVMClassifier(config)
    
    X_train, X_test, y_train, y_test = svm_classifier.prepare_data(open_path="../data/open_domain_data.csv", 
                                                                   specific_path="../data/specific_domain_data.csv")
    
    svm_classifier.train(X_train, y_train)
    cv_results = svm_classifier.cross_validate_model(X_train, y_train, cv=3)

    wandb.log({"cv_results": cv_results.get("average_accuracy")})
    
    return cv_results.get("average_accuracy")

sweep_configuration = {
    'method': 'random', 
    'name': 'svm-sweep',
    'metric': {
        'name': 'cv_results',
        'goal': 'maximize',  
    },
    'parameters': {
        'C': {
            'min': float(0.01),
            'max': float(10),
        },
        'train_size': {
            'min': 1000,
            'max': 10000,
        },
        'test_size': {
            'min': 1000,
            'max': 10000,
        },
        'seed': {
            'values': [random.randint(0, 100) for _ in range(10)],
        }
    }
}

sweep_id = wandb.sweep(sweep_configuration, project='bc-prompt-classification')

wandb.agent(sweep_id, function=sweep_svm_classifier, count=10)

In [None]:
wandb.login() 
api = wandb.Api()

sweeps = api.sweep(f'/riso-sleher-fiit-stu/bc-prompt-classification/sweeps/{sweep_id}')

best_config = sweeps.best_run().config

svm_classifier = SVMClassifier(best_config)

X_train, X_test, y_train, y_test = svm_classifier.prepare_data(open_path="../data/open_domain_data.csv", 
                                                               specific_path="../data/specific_domain_data.csv")

svm_classifier.train(X_train, y_train)
svm_classifier.evaluate(X_test, y_test)

wandb: Sorting runs by -summary_metrics.cv_results


Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98      1135
           1       0.97      0.97      0.97       815

    accuracy                           0.97      1950
   macro avg       0.97      0.97      0.97      1950
weighted avg       0.97      0.97      0.97      1950



{'accuracy': 0.9738461538461538,
 'precision': np.float64(0.9692874692874693),
 'recall': np.float64(0.9680981595092024)}

In [None]:
# Save the model
svm_classifier.save_model(f'../models/{svm_classifier.model_name}.joblib')

In [None]:
svm_classifier.load_model(f'../models/{svm_classifier.model_name}.joblib')

result = svm_classifier.predict("How does culture influence perceptions of justice and fairness within a legal system?")

print(result)