In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [13]:
os.chdir(r"C:\Users\thila\Downloads\AI_TASK")
synthetic_df = pd.read_csv("Synthetic Data.csv")
title_emb_df = pd.read_csv("title_embeddings.csv")
desc_emb_df = pd.read_csv("description_embeddings.csv")

In [15]:
combined_embeddings = pd.concat([title_emb_df, desc_emb_df], axis=1).values  # shape: (n_samples, title_dim + desc_dim)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(synthetic_df["Label"].astype(str))
X_train, X_test, y_train, y_test = train_test_split(
    combined_embeddings, y, test_size=0.2, random_state=42, stratify=y
)
X_train.shape
combined_embeddings.shape

(500, 768)

In [8]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_preds = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_preds)
nb_precision = precision_score(y_test, nb_preds, average='weighted', zero_division=0)
nb_recall = recall_score(y_test, nb_preds, average='weighted', zero_division=0)
print(f"Accuracy: {nb_accuracy:.4f}")
print(f"Precision: {nb_precision:.4f}")
print(f"Recall: {nb_recall:.4f}")

Accuracy: 0.9400
Precision: 0.9521
Recall: 0.9400


In [9]:
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True)
svm_model.fit(X_train, y_train)
svm_preds = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_preds)
svm_precision = precision_score(y_test, svm_preds, average='weighted', zero_division=0)
svm_recall = recall_score(y_test, svm_preds, average='weighted', zero_division=0)
print(f"Accuracy: {svm_accuracy:.4f}")
print(f"Precision: {svm_precision:.4f}")
print(f"Recall: {svm_recall:.4f}")


Accuracy: 0.9600
Precision: 0.9643
Recall: 0.9600


In [13]:
import pickle
with open('label_classification.pkl', 'wb') as f:
    pickle.dump(svm_model, f)