In [2]:
import json
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score


In [None]:

# Load the training set from a JSON file
with open('intents.json', 'r') as f:
    data = json.load(f)
    intents = data['intents']

# Create the training data set
training_data = []
training_labels = []
for intent in intents:
    for pattern in intent['patterns']:
        training_data.append(pattern)
        training_labels.append(intent['tag'])

# Convert the training data to a TF-IDF representation
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(training_data)
y_train = training_labels

# Train a random forest classifier
classifier = RandomForestClassifier(random_state=0)
classifier.fit(X_train, y_train)



In [3]:

# Load the testing set from a JSON file
with open('intents_testing.json', 'r') as f:
    data = json.load(f)
    test_cases = data['intents']

# Test the classifier on the testing set
y_true = []
y_pred = []


for test_case in test_cases:
    questions = test_case['patterns']
    expected_intent = test_case['tag']
    predicted_intents = []
    for question in questions:
        X_test = vectorizer.transform([question])
        predicted_intent = classifier.predict(X_test)[0]
        predicted_intents.append(predicted_intent)
    predicted_intent = max(set(predicted_intents), key=predicted_intents.count)  # majority voting
    y_true.append(expected_intent)
    y_pred.append(predicted_intent)




In [4]:
from sklearn.metrics import accuracy_score, f1_score
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')

In [5]:
print('Accuracy:', accuracy)
print('F1 Score:', f1)

Accuracy: 0.7
F1 Score: 0.6375
