<a href="https://colab.research.google.com/github/ruwanwija/Research-Models/blob/main/Industry_Research_Final_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import nltk
import re
from sklearn import svm
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.cluster import KMeans
import numpy as np

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Preprocess the text
def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        tokens = word_tokenize(text)
        tokens = [word for word in tokens if word not in stopwords.words('english')]
        lemmatizer = WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(word) for word in tokens]
        return ' '.join(tokens)
    else:
        return ''

# Load the dataset
df = pd.read_csv('Sample dataset II.csv')

# Preprocess the text data
df['preprocessed_text'] = df['reviews.text'].apply(preprocess_text)

# Define label mapping
labels = ["Location", "Food Quality", "Value for Money", "Comfort", "Staff Behavior"]

# Create an instance of MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Apply K-means clustering to generate labels (dummy labels for demonstration)
num_clusters = len(labels)  # Assuming we have 6 labels as defined above
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['preprocessed_text'])

# Fit KMeans model
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X)

# Add cluster labels to the DataFrame
df['cluster_label'] = kmeans.labels_

# Create a binary indicator for each cluster (multi-label format)
y = pd.get_dummies(df['cluster_label']).values

# Use StratifiedKFold to ensure stratified sampling
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store accuracies, F1 scores, and probabilities for each fold
accuracies = []
f1_scores = []
mean_probabilities = np.zeros(num_clusters)  # To store mean probabilities for each class

for train_index, test_index in stratified_kfold.split(X, np.argmax(y, axis=1)):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Create the SVM model
    base_model = svm.SVC(kernel='linear', probability=True)

    # Wrap the SVC model with OneVsRestClassifier to handle multi-label classification
    model = OneVsRestClassifier(base_model)

    # Fit the model to the training data
    model.fit(X_train, y_train)

    # New record to predict
    new_record = ["Great place with unique room structure with wooden top. It helped spending our own time without any disturbance. We can see Beautiful Sun rise from the room by opening the curtain."]
    new_record_preprocessed = [preprocess_text(text) for text in new_record]
    new_record_tfidf = vectorizer.transform(new_record_preprocessed)

    # Predict the label for the new record
    predicted_labels_numeric = model.predict(new_record_tfidf)

    # Get the probabilities for each label category
    probabilities = model.predict_proba(new_record_tfidf)

    # Update mean probabilities
    mean_probabilities += probabilities[0]  # Sum up probabilities for averaging later

    # Display the predicted labels and probabilities
    probability_dict = {labels[i]: probabilities[0][i] for i in range(num_clusters)}

    # Find the label with the highest probability
    highest_label = max(probability_dict, key=probability_dict.get)
    highest_probability = probability_dict[highest_label]

    # Display results for the current fold
    print(f"Predicted Labels: {predicted_labels_numeric}")
    print("Probabilities for each class:")
    for label, prob in probability_dict.items():
        print(f"{label}: {prob:.4f}")  # Format to 4 decimal places

    print(f"\nHighest Probability Label: {highest_label} with probability: {highest_probability:.4f}")

    # Evaluate the model
    y_pred = model.predict(X_test)

    # Calculate accuracy and F1 score using binary arrays
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='micro')

    # Store scores for the current fold
    accuracies.append(accuracy)
    f1_scores.append(f1)

# Calculate mean probabilities
mean_probabilities /= stratified_kfold.get_n_splits()  # Average the probabilities over folds

# Find the highest mean probability
highest_mean_label = labels[np.argmax(mean_probabilities)]
highest_mean_probability = np.max(mean_probabilities)

# Display overall results
print(f'\nMean Accuracy: {np.mean(accuracies)}')
print(f'Mean F1 Score: {np.mean(f1_scores)}')
print("\nMean Probabilities for each class:")
for label, mean_prob in zip(labels, mean_probabilities):
    print(f"{label}: {mean_prob:.4f}")

print(f"\nHighest Mean Probability Label: {highest_mean_label} with probability: {highest_mean_probability:.4f}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0031
Food Quality: 0.4373
Value for Money: 0.0008
Comfort: 0.0005
Staff Behavior: 0.5979

Highest Probability Label: Staff Behavior with probability: 0.5979
Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0025
Food Quality: 0.2283
Value for Money: 0.0008
Comfort: 0.0004
Staff Behavior: 0.8911

Highest Probability Label: Staff Behavior with probability: 0.8911
Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0025
Food Quality: 0.1475
Value for Money: 0.0005
Comfort: 0.0003
Staff Behavior: 0.8975

Highest Probability Label: Staff Behavior with probability: 0.8975
Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0028
Food Quality: 0.1395
Value for Money: 0.0024
Comfort: 0.0003
Staff Behavior: 0.9121

Highest Probability Label: Staff Behavior with probability: 0.9121
Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.00

In [None]:
import pickle

# Assuming `model` is your trained SVM model
with open('svm_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [2]:
import pickle

# Assuming `model` is your trained SVM model and `vectorizer` is the TF-IDF vectorizer
with open('svm_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('vectorizer.pkl', 'wb') as vec_file:
    pickle.dump(vectorizer, vec_file)


#SVM

In [4]:
import pandas as pd
import nltk
import re
from sklearn import svm
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, f1_score, multilabel_confusion_matrix, classification_report
from sklearn.multiclass import OneVsRestClassifier
from sklearn.cluster import KMeans
import numpy as np

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Preprocess the text
def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        tokens = word_tokenize(text)
        tokens = [word for word in tokens if word not in stopwords.words('english')]
        lemmatizer = WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(word) for word in tokens]
        return ' '.join(tokens)
    else:
        return ''

# Load the dataset
df = pd.read_csv('Sample dataset II.csv')

# Preprocess the text data
df['preprocessed_text'] = df['reviews.text'].apply(preprocess_text)

# Define label mapping
labels = ["Location", "Food Quality", "Value for Money", "Comfort", "Staff Behavior"]

# Create an instance of MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Apply K-means clustering to generate labels (dummy labels for demonstration)
num_clusters = len(labels)  # Assuming we have 6 labels as defined above
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['preprocessed_text'])

# Fit KMeans model
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X)

# Add cluster labels to the DataFrame
df['cluster_label'] = kmeans.labels_

# Create a binary indicator for each cluster (multi-label format)
y = pd.get_dummies(df['cluster_label']).values

# Use StratifiedKFold to ensure stratified sampling
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store accuracies, F1 scores, and probabilities for each fold
accuracies = []
f1_scores = []
mean_probabilities = np.zeros(num_clusters)  # To store mean probabilities for each class

for train_index, test_index in stratified_kfold.split(X, np.argmax(y, axis=1)):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Create the SVM model
    base_model = svm.SVC(kernel='linear', probability=True)

    # Wrap the SVC model with OneVsRestClassifier to handle multi-label classification
    model = OneVsRestClassifier(base_model)

    # Fit the model to the training data
    model.fit(X_train, y_train)

    # New record to predict
    new_record = ["Great place with unique room structure with wooden top. It helped spending our own time without any disturbance. We can see Beautiful Sun rise from the room by opening the curtain."]
    new_record_preprocessed = [preprocess_text(text) for text in new_record]
    new_record_tfidf = vectorizer.transform(new_record_preprocessed)

    # Predict the label for the new record
    predicted_labels_numeric = model.predict(new_record_tfidf)

    # Get the probabilities for each label category
    probabilities = model.predict_proba(new_record_tfidf)

    # Update mean probabilities
    mean_probabilities += probabilities[0]  # Sum up probabilities for averaging later

    # Display the predicted labels and probabilities
    probability_dict = {labels[i]: probabilities[0][i] for i in range(num_clusters)}

    # Find the label with the highest probability
    highest_label = max(probability_dict, key=probability_dict.get)
    highest_probability = probability_dict[highest_label]

    # Display results for the current fold
    print(f"Predicted Labels: {predicted_labels_numeric}")
    print("Probabilities for each class:")
    for label, prob in probability_dict.items():
        print(f"{label}: {prob:.4f}")  # Format to 4 decimal places

    print(f"\nHighest Probability Label: {highest_label} with probability: {highest_probability:.4f}")

    # Evaluate the model
    y_pred = model.predict(X_test)

    # Calculate the confusion matrix for each label
    conf_matrix = multilabel_confusion_matrix(y_test, y_pred)

    # Display confusion matrices for each label
    for i, label in enumerate(labels):
        print(f"\nConfusion Matrix for {label}:")
        print(conf_matrix[i])

    # Generate and print precision, recall, and F1 score for each label
    print(f"\nClassification Report for Fold:")
    print(classification_report(y_test, y_pred, target_names=labels))

    # Calculate accuracy and F1 score using binary arrays
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='micro')

    # Store scores for the current fold
    accuracies.append(accuracy)
    f1_scores.append(f1)

# Calculate mean probabilities
mean_probabilities /= stratified_kfold.get_n_splits()  # Average the probabilities over folds

# Find the highest mean probability
highest_mean_label = labels[np.argmax(mean_probabilities)]
highest_mean_probability = np.max(mean_probabilities)

# Display overall results
print(f'\nMean Accuracy: {np.mean(accuracies)}')
print(f'Mean F1 Score: {np.mean(f1_scores)}')
print("\nMean Probabilities for each class:")
for label, mean_prob in zip(labels, mean_probabilities):
    print(f"{label}: {mean_prob:.4f}")

print(f"\nHighest Mean Probability Label: {highest_mean_label} with probability: {highest_mean_probability:.4f}")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0033
Food Quality: 0.4074
Value for Money: 0.0007
Comfort: 0.0005
Staff Behavior: 0.5647

Highest Probability Label: Staff Behavior with probability: 0.5647

Confusion Matrix for Location:
[[187   0]
 [  0  13]]

Confusion Matrix for Food Quality:
[[143   7]
 [ 14  36]]

Confusion Matrix for Value for Money:
[[182   0]
 [  3  15]]

Confusion Matrix for Comfort:
[[175   0]
 [ 10  15]]

Confusion Matrix for Staff Behavior:
[[99  7]
 [ 5 89]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      1.00      1.00        13
   Food Quality       0.84      0.72      0.77        50
Value for Money       1.00      0.83      0.91        18
        Comfort       1.00      0.60      0.75        25
 Staff Behavior       0.93      0.95      0.94        94

      micro avg       0.92      0.84      0.88       200
      macro avg       0.95      0.82      0.87  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0024
Food Quality: 0.1922
Value for Money: 0.0028
Comfort: 0.0005
Staff Behavior: 0.8928

Highest Probability Label: Staff Behavior with probability: 0.8928

Confusion Matrix for Location:
[[187   0]
 [  1  12]]

Confusion Matrix for Food Quality:
[[145   5]
 [ 11  39]]

Confusion Matrix for Value for Money:
[[182   0]
 [  8  10]]

Confusion Matrix for Comfort:
[[171   4]
 [  9  16]]

Confusion Matrix for Staff Behavior:
[[98  8]
 [ 5 89]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      0.92      0.96        13
   Food Quality       0.89      0.78      0.83        50
Value for Money       1.00      0.56      0.71        18
        Comfort       0.80      0.64      0.71        25
 Staff Behavior       0.92      0.95      0.93        94

      micro avg       0.91      0.83      0.87       200
      macro avg       0.92      0.77      0.83  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0025
Food Quality: 0.1814
Value for Money: 0.0007
Comfort: 0.0002
Staff Behavior: 0.9073

Highest Probability Label: Staff Behavior with probability: 0.9073

Confusion Matrix for Location:
[[187   0]
 [  0  13]]

Confusion Matrix for Food Quality:
[[147   3]
 [ 13  37]]

Confusion Matrix for Value for Money:
[[181   1]
 [  6  12]]

Confusion Matrix for Comfort:
[[174   1]
 [  9  16]]

Confusion Matrix for Staff Behavior:
[[97  9]
 [ 4 90]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      1.00      1.00        13
   Food Quality       0.93      0.74      0.82        50
Value for Money       0.92      0.67      0.77        18
        Comfort       0.94      0.64      0.76        25
 Staff Behavior       0.91      0.96      0.93        94

      micro avg       0.92      0.84      0.88       200
      macro avg       0.94      0.80      0.86  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0029
Food Quality: 0.1466
Value for Money: 0.0033
Comfort: 0.0001
Staff Behavior: 0.8999

Highest Probability Label: Staff Behavior with probability: 0.8999

Confusion Matrix for Location:
[[187   0]
 [  0  13]]

Confusion Matrix for Food Quality:
[[146   4]
 [ 11  39]]

Confusion Matrix for Value for Money:
[[182   0]
 [ 10   8]]

Confusion Matrix for Comfort:
[[175   0]
 [  7  18]]

Confusion Matrix for Staff Behavior:
[[98  8]
 [ 7 87]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      1.00      1.00        13
   Food Quality       0.91      0.78      0.84        50
Value for Money       1.00      0.44      0.62        18
        Comfort       1.00      0.72      0.84        25
 Staff Behavior       0.92      0.93      0.92        94

      micro avg       0.93      0.82      0.88       200
      macro avg       0.96      0.77      0.84  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0025
Food Quality: 0.2253
Value for Money: 0.0003
Comfort: 0.0002
Staff Behavior: 0.9818

Highest Probability Label: Staff Behavior with probability: 0.9818

Confusion Matrix for Location:
[[187   0]
 [  2  10]]

Confusion Matrix for Food Quality:
[[146   3]
 [ 12  38]]

Confusion Matrix for Value for Money:
[[180   0]
 [  6  13]]

Confusion Matrix for Comfort:
[[172   2]
 [ 12  13]]

Confusion Matrix for Staff Behavior:
[[98  8]
 [ 9 84]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      0.83      0.91        12
   Food Quality       0.93      0.76      0.84        50
Value for Money       1.00      0.68      0.81        19
        Comfort       0.87      0.52      0.65        25
 Staff Behavior       0.91      0.90      0.91        93

      micro avg       0.92      0.79      0.85       199
      macro avg       0.94      0.74      0.82  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Logistic Regression

In [5]:
import pandas as pd
import nltk
import re
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, f1_score, multilabel_confusion_matrix, classification_report
from sklearn.multiclass import OneVsRestClassifier
from sklearn.cluster import KMeans
import numpy as np

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Preprocess the text
def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        tokens = word_tokenize(text)
        tokens = [word for word in tokens if word not in stopwords.words('english')]
        lemmatizer = WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(word) for word in tokens]
        return ' '.join(tokens)
    else:
        return ''

# Load the dataset
df = pd.read_csv('Sample dataset II.csv')

# Preprocess the text data
df['preprocessed_text'] = df['reviews.text'].apply(preprocess_text)

# Define label mapping
labels = ["Location", "Food Quality", "Value for Money", "Comfort", "Staff Behavior"]

# Create an instance of MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Apply K-means clustering to generate labels (dummy labels for demonstration)
num_clusters = len(labels)  # Assuming we have 6 labels as defined above
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['preprocessed_text'])

# Fit KMeans model
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X)

# Add cluster labels to the DataFrame
df['cluster_label'] = kmeans.labels_

# Create a binary indicator for each cluster (multi-label format)
y = pd.get_dummies(df['cluster_label']).values

# Use StratifiedKFold to ensure stratified sampling
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store accuracies, F1 scores, and probabilities for each fold
accuracies = []
f1_scores = []
mean_probabilities = np.zeros(num_clusters)  # To store mean probabilities for each class

for train_index, test_index in stratified_kfold.split(X, np.argmax(y, axis=1)):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Create the Logistic Regression model
    base_model = LogisticRegression(max_iter=1000)

    # Wrap the Logistic Regression model with OneVsRestClassifier to handle multi-label classification
    model = OneVsRestClassifier(base_model)

    # Fit the model to the training data
    model.fit(X_train, y_train)

    # New record to predict
    new_record = ["Great place with unique room structure with wooden top. It helped spending our own time without any disturbance. We can see Beautiful Sun rise from the room by opening the curtain."]
    new_record_preprocessed = [preprocess_text(text) for text in new_record]
    new_record_tfidf = vectorizer.transform(new_record_preprocessed)

    # Predict the label for the new record
    predicted_labels_numeric = model.predict(new_record_tfidf)

    # Get the probabilities for each label category
    probabilities = model.predict_proba(new_record_tfidf)

    # Update mean probabilities
    mean_probabilities += probabilities[0]  # Sum up probabilities for averaging later

    # Display the predicted labels and probabilities
    probability_dict = {labels[i]: probabilities[0][i] for i in range(num_clusters)}

    # Find the label with the highest probability
    highest_label = max(probability_dict, key=probability_dict.get)
    highest_probability = probability_dict[highest_label]

    # Display results for the current fold
    print(f"Predicted Labels: {predicted_labels_numeric}")
    print("Probabilities for each class:")
    for label, prob in probability_dict.items():
        print(f"{label}: {prob:.4f}")  # Format to 4 decimal places

    print(f"\nHighest Probability Label: {highest_label} with probability: {highest_probability:.4f}")

    # Evaluate the model
    y_pred = model.predict(X_test)

    # Calculate the confusion matrix for each label
    conf_matrix = multilabel_confusion_matrix(y_test, y_pred)

    # Display confusion matrices for each label
    for i, label in enumerate(labels):
        print(f"\nConfusion Matrix for {label}:")
        print(conf_matrix[i])

    # Generate and print precision, recall, and F1 score for each label
    print(f"\nClassification Report for Fold:")
    print(classification_report(y_test, y_pred, target_names=labels))

    # Calculate accuracy and F1 score using binary arrays
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='micro')

    # Store scores for the current fold
    accuracies.append(accuracy)
    f1_scores.append(f1)

# Calculate mean probabilities
mean_probabilities /= stratified_kfold.get_n_splits()  # Average the probabilities over folds

# Find the highest mean probability
highest_mean_label = labels[np.argmax(mean_probabilities)]
highest_mean_probability = np.max(mean_probabilities)

# Display overall results
print(f'\nMean Accuracy: {np.mean(accuracies)}')
print(f'Mean F1 Score: {np.mean(f1_scores)}')
print("\nMean Probabilities for each class:")
for label, mean_prob in zip(labels, mean_probabilities):
    print(f"{label}: {mean_prob:.4f}")

print(f"\nHighest Mean Probability Label: {highest_mean_label} with probability: {highest_mean_probability:.4f}")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0350
Food Quality: 0.2690
Value for Money: 0.0490
Comfort: 0.0548
Staff Behavior: 0.5828

Highest Probability Label: Staff Behavior with probability: 0.5828

Confusion Matrix for Location:
[[187   0]
 [  8   5]]

Confusion Matrix for Food Quality:
[[146   4]
 [ 36  14]]

Confusion Matrix for Value for Money:
[[182   0]
 [ 14   4]]

Confusion Matrix for Comfort:
[[175   0]
 [ 22   3]]

Confusion Matrix for Staff Behavior:
[[100   6]
 [  3  91]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      0.38      0.56        13
   Food Quality       0.78      0.28      0.41        50
Value for Money       1.00      0.22      0.36        18
        Comfort       1.00      0.12      0.21        25
 Staff Behavior       0.94      0.97      0.95        94

      micro avg       0.92      0.58      0.72       200
      macro avg       0.94      0.39      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0359
Food Quality: 0.2280
Value for Money: 0.0528
Comfort: 0.0603
Staff Behavior: 0.5989

Highest Probability Label: Staff Behavior with probability: 0.5989

Confusion Matrix for Location:
[[187   0]
 [  6   7]]

Confusion Matrix for Food Quality:
[[150   0]
 [ 32  18]]

Confusion Matrix for Value for Money:
[[182   0]
 [ 13   5]]

Confusion Matrix for Comfort:
[[175   0]
 [ 23   2]]

Confusion Matrix for Staff Behavior:
[[101   5]
 [  5  89]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      0.54      0.70        13
   Food Quality       1.00      0.36      0.53        50
Value for Money       1.00      0.28      0.43        18
        Comfort       1.00      0.08      0.15        25
 Staff Behavior       0.95      0.95      0.95        94

      micro avg       0.96      0.60      0.74       200
      macro avg       0.99      0.44      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0348
Food Quality: 0.2392
Value for Money: 0.0442
Comfort: 0.0588
Staff Behavior: 0.6330

Highest Probability Label: Staff Behavior with probability: 0.6330

Confusion Matrix for Location:
[[187   0]
 [  8   4]]

Confusion Matrix for Food Quality:
[[148   1]
 [ 38  12]]

Confusion Matrix for Value for Money:
[[180   0]
 [ 16   3]]

Confusion Matrix for Comfort:
[[174   0]
 [ 22   3]]

Confusion Matrix for Staff Behavior:
[[98  8]
 [ 8 85]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      0.33      0.50        12
   Food Quality       0.92      0.24      0.38        50
Value for Money       1.00      0.16      0.27        19
        Comfort       1.00      0.12      0.21        25
 Staff Behavior       0.91      0.91      0.91        93

      micro avg       0.92      0.54      0.68       199
      macro avg       0.97      0.35      0.46  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Decision Tree

In [7]:
import pandas as pd
import nltk
import re
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, f1_score, multilabel_confusion_matrix, classification_report
from sklearn.multiclass import OneVsRestClassifier
from sklearn.cluster import KMeans
import numpy as np

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Preprocess the text
def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        tokens = word_tokenize(text)
        tokens = [word for word in tokens if word not in stopwords.words('english')]
        lemmatizer = WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(word) for word in tokens]
        return ' '.join(tokens)
    else:
        return ''

# Load the dataset
df = pd.read_csv('Sample dataset II.csv')

# Preprocess the text data
df['preprocessed_text'] = df['reviews.text'].apply(preprocess_text)

# Define label mapping
labels = ["Location", "Food Quality", "Value for Money", "Comfort", "Staff Behavior"]

# Create an instance of MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Apply K-means clustering to generate labels (dummy labels for demonstration)
num_clusters = len(labels)  # Assuming we have 6 labels as defined above
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['preprocessed_text'])

# Fit KMeans model
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X)

# Add cluster labels to the DataFrame
df['cluster_label'] = kmeans.labels_

# Create a binary indicator for each cluster (multi-label format)
y = pd.get_dummies(df['cluster_label']).values

# Use StratifiedKFold to ensure stratified sampling
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store accuracies, F1 scores, and probabilities for each fold
accuracies = []
f1_scores = []
mean_probabilities = np.zeros(num_clusters)  # To store mean probabilities for each class

for train_index, test_index in stratified_kfold.split(X, np.argmax(y, axis=1)):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Create the Decision Tree model
    base_model = DecisionTreeClassifier(random_state=42)

    # Wrap the Decision Tree model with OneVsRestClassifier to handle multi-label classification
    model = OneVsRestClassifier(base_model)

    # Fit the model to the training data
    model.fit(X_train, y_train)

    # New record to predict
    new_record = ["Great place with unique room structure with wooden top. It helped spending our own time without any disturbance. We can see Beautiful Sun rise from the room by opening the curtain."]
    new_record_preprocessed = [preprocess_text(text) for text in new_record]
    new_record_tfidf = vectorizer.transform(new_record_preprocessed)

    # Predict the label for the new record
    predicted_labels_numeric = model.predict(new_record_tfidf)

    # Get the probabilities for each label category
    probabilities = model.predict_proba(new_record_tfidf)

    # Update mean probabilities
    mean_probabilities += probabilities[0]  # Sum up probabilities for averaging later

    # Display the predicted labels and probabilities
    probability_dict = {labels[i]: probabilities[0][i] for i in range(num_clusters)}

    # Find the label with the highest probability
    highest_label = max(probability_dict, key=probability_dict.get)
    highest_probability = probability_dict[highest_label]

    # Display results for the current fold
    print(f"Predicted Labels: {predicted_labels_numeric}")
    print("Probabilities for each class:")
    for label, prob in probability_dict.items():
        print(f"{label}: {prob:.4f}")  # Format to 4 decimal places

    print(f"\nHighest Probability Label: {highest_label} with probability: {highest_probability:.4f}")

    # Evaluate the model
    y_pred = model.predict(X_test)

    # Calculate the confusion matrix for each label
    conf_matrix = multilabel_confusion_matrix(y_test, y_pred)

    # Display confusion matrices for each label
    for i, label in enumerate(labels):
        print(f"\nConfusion Matrix for {label}:")
        print(conf_matrix[i])

    # Generate and print precision, recall, and F1 score for each label
    print(f"\nClassification Report for Fold:")
    print(classification_report(y_test, y_pred, target_names=labels))

    # Calculate accuracy and F1 score using binary arrays
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='micro')

    # Store scores for the current fold
    accuracies.append(accuracy)
    f1_scores.append(f1)

# Calculate mean probabilities
mean_probabilities /= stratified_kfold.get_n_splits()  # Average the probabilities over folds

# Find the highest mean probability
highest_mean_label = labels[np.argmax(mean_probabilities)]
highest_mean_probability = np.max(mean_probabilities)

# Display overall results
print(f'\nMean Accuracy: {np.mean(accuracies)}')
print(f'Mean F1 Score: {np.mean(f1_scores)}')
print("\nMean Probabilities for each class:")
for label, mean_prob in zip(labels, mean_probabilities):
    print(f"{label}: {mean_prob:.4f}")

print(f"\nHighest Mean Probability Label: {highest_mean_label} with probability: {highest_mean_probability:.4f}")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0000
Food Quality: 0.0000
Value for Money: 0.0000
Comfort: 0.0000
Staff Behavior: 1.0000

Highest Probability Label: Staff Behavior with probability: 1.0000

Confusion Matrix for Location:
[[187   0]
 [  0  13]]

Confusion Matrix for Food Quality:
[[139  11]
 [ 17  33]]

Confusion Matrix for Value for Money:
[[178   4]
 [  5  13]]

Confusion Matrix for Comfort:
[[168   7]
 [ 10  15]]

Confusion Matrix for Staff Behavior:
[[86 20]
 [18 76]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      1.00      1.00        13
   Food Quality       0.75      0.66      0.70        50
Value for Money       0.76      0.72      0.74        18
        Comfort       0.68      0.60      0.64        25
 Staff Behavior       0.79      0.81      0.80        94

      micro avg       0.78      0.75      0.77       200
      macro avg       0.80      0.76      0.78  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0000
Food Quality: 0.0000
Value for Money: 0.0000
Comfort: 0.0000
Staff Behavior: 1.0000

Highest Probability Label: Staff Behavior with probability: 1.0000

Confusion Matrix for Location:
[[187   0]
 [  2  11]]

Confusion Matrix for Food Quality:
[[138  12]
 [ 13  37]]

Confusion Matrix for Value for Money:
[[170  12]
 [  4  14]]

Confusion Matrix for Comfort:
[[170   5]
 [  5  20]]

Confusion Matrix for Staff Behavior:
[[95 11]
 [23 71]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      0.85      0.92        13
   Food Quality       0.76      0.74      0.75        50
Value for Money       0.54      0.78      0.64        18
        Comfort       0.80      0.80      0.80        25
 Staff Behavior       0.87      0.76      0.81        94

      micro avg       0.79      0.77      0.78       200
      macro avg       0.79      0.78      0.78  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0000
Food Quality: 0.0000
Value for Money: 0.0000
Comfort: 0.0000
Staff Behavior: 1.0000

Highest Probability Label: Staff Behavior with probability: 1.0000

Confusion Matrix for Location:
[[187   0]
 [  0  13]]

Confusion Matrix for Food Quality:
[[134  16]
 [ 17  33]]

Confusion Matrix for Value for Money:
[[178   4]
 [  6  12]]

Confusion Matrix for Comfort:
[[167   8]
 [  7  18]]

Confusion Matrix for Staff Behavior:
[[94 12]
 [16 78]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      1.00      1.00        13
   Food Quality       0.67      0.66      0.67        50
Value for Money       0.75      0.67      0.71        18
        Comfort       0.69      0.72      0.71        25
 Staff Behavior       0.87      0.83      0.85        94

      micro avg       0.79      0.77      0.78       200
      macro avg       0.80      0.78      0.79  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0000
Food Quality: 0.0000
Value for Money: 0.0000
Comfort: 0.0000
Staff Behavior: 1.0000

Highest Probability Label: Staff Behavior with probability: 1.0000

Confusion Matrix for Location:
[[187   0]
 [  0  13]]

Confusion Matrix for Food Quality:
[[140  10]
 [ 17  33]]

Confusion Matrix for Value for Money:
[[179   3]
 [  4  14]]

Confusion Matrix for Comfort:
[[167   8]
 [  5  20]]

Confusion Matrix for Staff Behavior:
[[92 14]
 [13 81]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      1.00      1.00        13
   Food Quality       0.77      0.66      0.71        50
Value for Money       0.82      0.78      0.80        18
        Comfort       0.71      0.80      0.75        25
 Staff Behavior       0.85      0.86      0.86        94

      micro avg       0.82      0.81      0.81       200
      macro avg       0.83      0.82      0.82  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted Labels: [[0 0 0 0 1]]
Probabilities for each class:
Location: 0.0000
Food Quality: 0.0000
Value for Money: 0.0000
Comfort: 0.0000
Staff Behavior: 1.0000

Highest Probability Label: Staff Behavior with probability: 1.0000

Confusion Matrix for Location:
[[187   0]
 [  1  11]]

Confusion Matrix for Food Quality:
[[134  15]
 [ 12  38]]

Confusion Matrix for Value for Money:
[[174   6]
 [  8  11]]

Confusion Matrix for Comfort:
[[163  11]
 [  5  20]]

Confusion Matrix for Staff Behavior:
[[93 13]
 [11 82]]

Classification Report for Fold:
                 precision    recall  f1-score   support

       Location       1.00      0.92      0.96        12
   Food Quality       0.72      0.76      0.74        50
Value for Money       0.65      0.58      0.61        19
        Comfort       0.65      0.80      0.71        25
 Staff Behavior       0.86      0.88      0.87        93

      micro avg       0.78      0.81      0.80       199
      macro avg       0.77      0.79      0.78  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


#Deep Neural Network

In [8]:
import pandas as pd
import nltk
import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.cluster import KMeans
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Preprocess the text
def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        tokens = word_tokenize(text)
        tokens = [word for word in tokens if word not in stopwords.words('english')]
        lemmatizer = WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(word) for word in tokens]
        return ' '.join(tokens)
    else:
        return ''

# Load the dataset
df = pd.read_csv('Sample dataset II.csv')

# Preprocess the text data
df['preprocessed_text'] = df['reviews.text'].apply(preprocess_text)

# Define label mapping
labels = ["Location", "Food Quality", "Value for Money", "Comfort", "Staff Behavior"]

# Create an instance of MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Apply K-means clustering to generate labels (dummy labels for demonstration)
num_clusters = len(labels)
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['preprocessed_text'])

# Fit KMeans model
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X)

# Add cluster labels to the DataFrame
df['cluster_label'] = kmeans.labels_

# Create a binary indicator for each cluster (multi-label format)
y = pd.get_dummies(df['cluster_label']).values

# Define a function to create a DNN model
def create_dnn_model(input_shape, num_classes):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='sigmoid')  # Sigmoid for multi-label output
    ])
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Use StratifiedKFold to ensure stratified sampling
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store accuracies and reports for each fold
accuracies = []
classification_reports = []

for train_index, test_index in stratified_kfold.split(X, np.argmax(y, axis=1)):
    X_train, X_test = X[train_index].toarray(), X[test_index].toarray()
    y_train, y_test = y[train_index], y[test_index]

    # Create the DNN model
    model = create_dnn_model(X_train.shape[1], num_clusters)

    # Set early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train, y_train,
                        epochs=20,
                        batch_size=32,
                        validation_split=0.2,
                        callbacks=[early_stopping],
                        verbose=1)

    # Evaluate the model
    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob > 0.5).astype(int)  # Convert probabilities to binary values

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

    # Generate and store classification report
    report = classification_report(y_test, y_pred, target_names=labels, output_dict=True)
    classification_reports.append(report)

    # Display results for the current fold
    print(f"\nFold Accuracy: {accuracy}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=labels))

# Display overall results
print(f'\nMean Accuracy across folds: {np.mean(accuracies)}')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.3114 - loss: 0.6826 - val_accuracy: 0.5188 - val_loss: 0.6281
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4732 - loss: 0.5935 - val_accuracy: 0.5188 - val_loss: 0.4769
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4724 - loss: 0.4545 - val_accuracy: 0.5188 - val_loss: 0.4088
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5079 - loss: 0.4020 - val_accuracy: 0.5188 - val_loss: 0.3877
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5657 - loss: 0.3606 - val_accuracy: 0.5625 - val_loss: 0.3613
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.7054 - loss: 0.3090 - val_accuracy: 0.6438 - val_loss: 0.3273
Epoch 7/20
[1m20/20[0m [32m━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.3087 - loss: 0.6857 - val_accuracy: 0.5312 - val_loss: 0.6393
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4679 - loss: 0.6109 - val_accuracy: 0.5312 - val_loss: 0.5096
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4607 - loss: 0.4889 - val_accuracy: 0.5312 - val_loss: 0.4202
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4671 - loss: 0.4176 - val_accuracy: 0.5312 - val_loss: 0.3817
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5424 - loss: 0.3768 - val_accuracy: 0.5875 - val_loss: 0.3524
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6956 - loss: 0.3083 - val_accuracy: 0.6625 - val_loss: 0.3180
Epoch 7/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.2927 - loss: 0.6809 - val_accuracy: 0.5500 - val_loss: 0.6250
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.4748 - loss: 0.5870 - val_accuracy: 0.4812 - val_loss: 0.4725
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.4922 - loss: 0.4497 - val_accuracy: 0.4812 - val_loss: 0.4108
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.4994 - loss: 0.4016 - val_accuracy: 0.5188 - val_loss: 0.3837
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5965 - loss: 0.3466 - val_accuracy: 0.5875 - val_loss: 0.3504
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7265 - loss: 0.2771 - val_accuracy: 0.6313 - val_loss: 0.3184
Epoch 7/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Fold Accuracy: 0.77

Classification Report:
                 precision    recall  f1-score   support

       Location       1.00      1.00      1.00        13
   Food Quality       0.91      0.62      0.74        50
Value for Money       0.90      0.50      0.64        18
        Comfort       0.93      0.52      0.67        25
 Staff Behavior       0.85      0.94      0.89        94

      micro avg       0.89      0.77      0.82       200
      macro avg       0.92      0.72      0.79       200
   weighted avg       0.89      0.77      0.81       200
    samples avg       0.77      0.77      0.77       200

Epoch 1/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.3694 - loss: 0.6814 - val_accuracy: 0.5188 - val_loss: 0.6251
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4387 - loss: 0.5901 - val_accuracy: 0.5188 - val_loss: 0.4843
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Fold Accuracy: 0.715

Classification Report:
                 precision    recall  f1-score   support

       Location       1.00      1.00      1.00        13
   Food Quality       0.83      0.70      0.76        50
Value for Money       1.00      0.56      0.71        18
        Comfort       1.00      0.44      0.61        25
 Staff Behavior       0.81      0.79      0.80        94

      micro avg       0.86      0.71      0.78       200
      macro avg       0.93      0.70      0.78       200
   weighted avg       0.87      0.71      0.77       200
    samples avg       0.71      0.71      0.71       200

Epoch 1/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.4049 - loss: 0.6745 - val_accuracy: 0.5250 - val_loss: 0.6006
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4694 - loss: 0.5619 - val_accuracy: 0.5250 - val_loss: 0.4527
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
