In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
file_path = 'data.csv'  # Replace with the path to your file
data = pd.read_csv(file_path)

# Reshape data into a 'text' and 'label' format
buyer_data = pd.DataFrame({
    'text': data['buyer_phrases'],
    'label': 'buyer'
})

seller_data = pd.DataFrame({
    'text': data['seller_phrases'],
    'label': 'seller'
})

# Combine into a single dataset
classification_data = pd.concat([buyer_data, seller_data], ignore_index=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    classification_data['text'], classification_data['label'], test_size=0.2, random_state=42
)

# Convert text data to a bag-of-words representation
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Initialize and train a Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Predict on the test set
y_pred = model.predict(X_test_vec)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results
print(f'Accuracy: {accuracy * 100:.2f}%')
print('Classification Report:\n', classification_rep)

# Example: Predicting the category of a custom text phrase
example_text = ["Can you do a bulk deal?", "I’ll bring the best product."]

# Transform the example text using the same vectorizer
example_text_vec = vectorizer.transform(example_text)

# Predict and display the results
predictions = model.predict(example_text_vec)
for text, prediction in zip(example_text, predictions):
    print(f"Phrase: '{text}' is classified as: {prediction}")


Accuracy: 85.00%
Classification Report:
               precision    recall  f1-score   support

       buyer       0.80      1.00      0.89        12
      seller       1.00      0.62      0.77         8

    accuracy                           0.85        20
   macro avg       0.90      0.81      0.83        20
weighted avg       0.88      0.85      0.84        20

Phrase: 'Can you do a bulk deal?' is classified as: buyer
Phrase: 'I’ll bring the best product.' is classified as: seller
