In [None]:
import json
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score


In [None]:

# Load the training set from a JSON file
with open('intents.json', 'r') as f:
    data = json.load(f)
    intents = data['intents']

# Create the training data set
training_data = []
training_labels = []
for intent in intents:
    for pattern in intent['patterns']:
        training_data.append(pattern)
        training_labels.append(intent['tag'])

# Convert the training data to a TF-IDF representation
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(training_data)
y_train = training_labels

# Train a random forest classifier
classifier = RandomForestClassifier(criterion='gini',max_depth=120, n_estimators=45)
classifier.fit(X_train, y_train)



In [None]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
# Calculate training accuracy, recall, precision, and F1 score
training_accuracy = classifier.score(X_train, y_train)
training_recall = recall_score(y_train, classifier.predict(X_train), average='weighted')
training_precision = precision_score(y_train, classifier.predict(X_train), average='weighted')
training_f1 = f1_score(y_train, classifier.predict(X_train), average='weighted')


In [None]:
print(training_accuracy)
print(training_recall)
print(training_precision)
print(training_f1)

0.9575688073394495
0.9575688073394495
0.9589586693860739
0.9573977522269154


In [None]:

# Load the testing set from a JSON file
with open('intents_testing.json', 'r') as f:
    data = json.load(f)
    test_cases = data['intents']

# Test the classifier on the testing set
y_true = []
y_pred = []


for test_case in test_cases:
    questions = test_case['patterns']
    expected_intent = test_case['tag']
    predicted_intents = []
    for question in questions:
        X_test = vectorizer.transform([question])
        predicted_intent = classifier.predict(X_test)[0]
        predicted_intents.append(predicted_intent)
    predicted_intent = max(set(predicted_intents), key=predicted_intents.count)  # majority voting
    y_true.append(expected_intent)
    y_pred.append(predicted_intent)




In [None]:
from sklearn.metrics import accuracy_score,recall_score, precision_score, f1_score
accuracy = accuracy_score(y_true, y_pred)
recall = recall_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
print('Accuracy:', accuracy)
print('Recall:', recall)
print('Precision:', precision)
print('F1 Score:', f1)

Accuracy: 0.675
Recall: 0.675
Precision: 0.6041666666666666
F1 Score: 0.62


In [None]:
from sklearn.model_selection import cross_val_score, KFold
# Define the cross-validation method
cv = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation and print the results
scores = cross_val_score(classifier, X_train, y_train, cv=cv)
print(f'Scores: {scores}')
print(f'Mean score: {scores.mean()}')

Scores: [0.52       0.59428571 0.54022989 0.5        0.48850575]
Mean score: 0.5286042692939245
