### Text Preprocessing

1. Text Cleaning
2. Case Folding
3. Tokenizing
4. Stopword removal
5. Stemming

In [3]:
import pandas as pd
import regex as re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from spellchecker import SpellChecker

In [4]:
df = pd.read_csv('dataset/2505_tripadvisor.csv')
# drop duplicates
df = df.drop_duplicates(subset=['review'])
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4146 entries, 0 to 4163
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   reviewer_link         4139 non-null   object
 1   reviewer_name         4139 non-null   object
 2   reviewer_location     4130 non-null   object
 3   reviews_contribution  3499 non-null   object
 4   rating                4146 non-null   object
 5   review_link           4146 non-null   object
 6   review_title          4146 non-null   object
 7   visit_date            4018 non-null   object
 8   review                4146 non-null   object
 9   review_date           4146 non-null   object
dtypes: object(10)
memory usage: 356.3+ KB


In [5]:
# get the column review only
df_temp = pd.DataFrame(columns=['text'])
df_temp['text'] = df['review']

df_temp.head(3)

Unnamed: 0,text
0,We came to Borobudur Temple accompanied by our...
1,Good place for Buddhist pilgrimage place. The ...
2,Fantastic experience as it was super well orga...


In [6]:
df_temp['text'][0]

"We came to Borobudur Temple accompanied by our driver from Yogyakarta. Came early in the morning after visiting Setumbu Hill. Before coming here, make sure you have a ticket to go up to the temple grounds to get the best experience, because if you only buy a temple ground ticket, you can only tour the temple grounds without guidance from a local guide. It's best to plan your trip carefully because Borobudur Temple is a destination that you must visit when you come to Indonesia."

In [7]:
# cleaning text
def clean_text(text):
    text = re.sub(r"’", "'", text)
    text = re.sub(r"‘", "'", text)
    text = re.sub(r"`", "'", text)
    text = re.sub(r"\\", "", text)

    # remove white space
    text = re.sub(r'\xa0', '', text)
    text = re.sub(r'\t', '', text)
    text = re.sub(r'\r', '', text)
    text = re.sub(r'\n', '', text)

    text = re.sub(r"\'d", "do", text)
    # change if text contains "n't" to "not"
    text = re.sub(r"n't", " not", text)
    # Remove punctuation
    text = re.sub(r'[^\w\s]', ' ', text)
    # Removing numbers
    text = re.sub(r'\d+', '', text)
    # Remove whitespace
    text = re.sub(r'\s+', ' ', text)
    # # Remove repeated words
    text = re.sub(r'\b(\w+)( \1\b)+', r'\1', text)
    # remove extra space at the end of the text
    text = re.sub(r'\s+$', '', text)

    return text

In [8]:
df_temp['text_clean'] = df_temp['text'].apply(clean_text)
df_temp.head(3)

Unnamed: 0,text,text_clean
0,We came to Borobudur Temple accompanied by our...,We came to Borobudur Temple accompanied by our...
1,Good place for Buddhist pilgrimage place. The ...,Good place for Buddhist pilgrimage place The t...
2,Fantastic experience as it was super well orga...,Fantastic experience as it was super well orga...


In [9]:
# case folding
def casefolding(text):
    return text.lower()

In [10]:
df_temp['text_cf'] = df_temp['text_clean'].apply(casefolding)
df_temp.head(3)

Unnamed: 0,text,text_clean,text_cf
0,We came to Borobudur Temple accompanied by our...,We came to Borobudur Temple accompanied by our...,we came to borobudur temple accompanied by our...
1,Good place for Buddhist pilgrimage place. The ...,Good place for Buddhist pilgrimage place The t...,good place for buddhist pilgrimage place the t...
2,Fantastic experience as it was super well orga...,Fantastic experience as it was super well orga...,fantastic experience as it was super well orga...


In [11]:
# tokenization
def tokenization(text):
    return text.split()

In [12]:
df_temp['text_token'] = df_temp['text_cf'].apply(tokenization)
df_temp.head(3)

Unnamed: 0,text,text_clean,text_cf,text_token
0,We came to Borobudur Temple accompanied by our...,We came to Borobudur Temple accompanied by our...,we came to borobudur temple accompanied by our...,"[we, came, to, borobudur, temple, accompanied,..."
1,Good place for Buddhist pilgrimage place. The ...,Good place for Buddhist pilgrimage place The t...,good place for buddhist pilgrimage place the t...,"[good, place, for, buddhist, pilgrimage, place..."
2,Fantastic experience as it was super well orga...,Fantastic experience as it was super well orga...,fantastic experience as it was super well orga...,"[fantastic, experience, as, it, was, super, we..."


In [13]:
# remove stopword

# using nltk english stopwords
stopwords = stopwords.words('english')


def remove_stopword(text):
    return [word for word in text if word not in stopwords]

In [14]:
df_temp['text_stopword'] = df_temp['text_token'].apply(remove_stopword)
df_temp.head(3)

Unnamed: 0,text,text_clean,text_cf,text_token,text_stopword
0,We came to Borobudur Temple accompanied by our...,We came to Borobudur Temple accompanied by our...,we came to borobudur temple accompanied by our...,"[we, came, to, borobudur, temple, accompanied,...","[came, borobudur, temple, accompanied, driver,..."
1,Good place for Buddhist pilgrimage place. The ...,Good place for Buddhist pilgrimage place The t...,good place for buddhist pilgrimage place the t...,"[good, place, for, buddhist, pilgrimage, place...","[good, place, buddhist, pilgrimage, place, tic..."
2,Fantastic experience as it was super well orga...,Fantastic experience as it was super well orga...,fantastic experience as it was super well orga...,"[fantastic, experience, as, it, was, super, we...","[fantastic, experience, super, well, organised..."


In [15]:
# stemming

# using NLTK stemmer
stemmer = PorterStemmer()

def stemming(text):
    temp = [stemmer.stem(word) for word in text]
    return temp

In [16]:
df_temp['text_stem'] = df_temp['text_stopword'].apply(stemming)
df_temp.head(3)

Unnamed: 0,text,text_clean,text_cf,text_token,text_stopword,text_stem
0,We came to Borobudur Temple accompanied by our...,We came to Borobudur Temple accompanied by our...,we came to borobudur temple accompanied by our...,"[we, came, to, borobudur, temple, accompanied,...","[came, borobudur, temple, accompanied, driver,...","[came, borobudur, templ, accompani, driver, yo..."
1,Good place for Buddhist pilgrimage place. The ...,Good place for Buddhist pilgrimage place The t...,good place for buddhist pilgrimage place the t...,"[good, place, for, buddhist, pilgrimage, place...","[good, place, buddhist, pilgrimage, place, tic...","[good, place, buddhist, pilgrimag, place, tick..."
2,Fantastic experience as it was super well orga...,Fantastic experience as it was super well orga...,fantastic experience as it was super well orga...,"[fantastic, experience, as, it, was, super, we...","[fantastic, experience, super, well, organised...","[fantast, experi, super, well, organis, organi..."


In [17]:
df_temp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4146 entries, 0 to 4163
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   text           4146 non-null   object
 1   text_clean     4146 non-null   object
 2   text_cf        4146 non-null   object
 3   text_token     4146 non-null   object
 4   text_stopword  4146 non-null   object
 5   text_stem      4146 non-null   object
dtypes: object(6)
memory usage: 355.8+ KB


In [18]:
# save to csv
# df_temp.to_csv('cleaned_review.csv', index=False)

### Labelled

In [20]:
df_labelled_temp = pd.read_csv("dataset/final_sentiment.csv")
df_labelled_temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4146 entries, 0 to 4145
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    4146 non-null   object
 1   label   4146 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 64.9+ KB


In [21]:
df_labelled_temp['label'].value_counts()

 1    3341
 0     403
-1     402
Name: label, dtype: int64

In [22]:
df_labelled = pd.merge(df_labelled_temp, df_temp, how='inner', on=['text'])
df_labelled.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4146 entries, 0 to 4145
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   text           4146 non-null   object
 1   label          4146 non-null   int64 
 2   text_clean     4146 non-null   object
 3   text_cf        4146 non-null   object
 4   text_token     4146 non-null   object
 5   text_stopword  4146 non-null   object
 6   text_stem      4146 non-null   object
dtypes: int64(1), object(6)
memory usage: 259.1+ KB


### SPLIT

In [23]:
from sklearn.model_selection import train_test_split

In [24]:
X_train, X_test, y_train, y_test = train_test_split(df_labelled['text_stem'], df_labelled['label'], test_size=0.2, random_state=42)

### TF-IDF

In [25]:
import numpy as np
import math
from collections import defaultdict

In [26]:
# Calculate Term Frequency (TF)
def compute_tf(doc):
    tf = defaultdict(int)
    for word in doc:
        tf[word] += 1
    for word in tf:
        tf[word] /= len(doc)
    return tf

# Calculate Document Frequency (DF)
def compute_df(docs):
    df = defaultdict(int)
    for doc in docs:
        unique_terms = set(doc)
        for term in unique_terms:
            df[term] += 1
    return df

# Calculate Inverse Document Frequency (IDF)
def compute_idf(df, n_docs):
    idf = {}
    for term, count in df.items():
        idf[term] = math.log(n_docs / count)
    return idf

# Calculate TF
def compute_tfidf(tf, idf):
    tfidf = {}
    for term, tf_val in tf.items():
        tfidf[term] = tf_val * idf.get(term, 0)
    return tfidf

# Compute TF-IDF for training data
n_docs_train = len(X_train)
tf_docs_train = [compute_tf(doc) for doc in X_train]
df_train = compute_df(X_train)
idf_train = compute_idf(df_train, n_docs_train)
tfidf_docs_train = [compute_tfidf(tf, idf_train) for tf in tf_docs_train]

# Build the TF-IDF matrix for training data
vocabulary = list(idf_train.keys())
tfidf_matrix_train = np.zeros((n_docs_train, len(vocabulary)))

for i, tfidf in enumerate(tfidf_docs_train):
    for term, val in tfidf.items():
        if term in vocabulary:
            term_index = vocabulary.index(term)
            tfidf_matrix_train[i, term_index] = val

# Compute TF-IDF for test data using the training vocabulary and IDF
n_docs_test = len(X_test)
tf_docs_test = [compute_tf(doc) for doc in X_test]
tfidf_docs_test = [compute_tfidf(tf, idf_train) for tf in tf_docs_test]

# Build the TF-IDF matrix for test data
tfidf_matrix_test = np.zeros((n_docs_test, len(vocabulary)))

for i, tfidf in enumerate(tfidf_docs_test):
    for term, val in tfidf.items():
        if term in vocabulary:
            term_index = vocabulary.index(term)
            tfidf_matrix_test[i, term_index] = val

### SVM lib

In [27]:
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report
# Initialize and train the SVM
clf = svm.SVC(kernel='rbf')
clf.fit(tfidf_matrix_train, y_train)

# Predict on the test set
y_pred = clf.predict(tfidf_matrix_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Accuracy: 0.8024096385542169
Classification Report:
              precision    recall  f1-score   support

          -1       1.00      0.07      0.12        92
           0       0.00      0.00      0.00        78
           1       0.80      1.00      0.89       660

    accuracy                           0.80       830
   macro avg       0.60      0.36      0.34       830
weighted avg       0.75      0.80      0.72       830



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
from sklearn.metrics import f1_score

# show the micro average f1 score per label
f1 = f1_score(y_test, y_pred, average='macro')
print(f"F1 Score: {f1:.4f}")


F1 Score: 0.3373


### Support Vector Machine

In [29]:
import time
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import numpy as np
from cvxopt import matrix, solvers

In [30]:
# Function to calculate the gamma value for the RBF kernel
def calculate_gamma(X, gamma):
    if gamma == 'scale':
        # Use the 'scale' heuristic for gamma
        return 1.0 / (X.shape[1] * X.var())
    elif gamma == 'auto':
        # Use the 'auto' heuristic for gamma
        return 1.0 / X.shape[1]
    else:
        # Return the provided gamma value directly if it's numeric
        return gamma

# Function to compute the RBF (Radial Basis Function) kernel
def rbf_kernel(X1, X2, gamma):
    gamma_value = calculate_gamma(X1, gamma)
    X1_square = np.sum(X1 ** 2, axis=1).reshape(-1, 1)  # Compute squared norms of X1
    X2_square = np.sum(X2 ** 2, axis=1).reshape(1, -1)  # Compute squared norms of X2
    # Compute the RBF kernel matrix
    return np.exp(-gamma_value * (X1_square + X2_square - 2 * np.dot(X1, X2.T)))

In [31]:
class SVM:
    def __init__(self, C=1.0, gamma='scale'):
        self.C = C  # Regularization parameter
        self.gamma = gamma  # Kernel coefficient
        self.models = []  # List to hold models for each class

    # Fit the SVM model using One-vs-Rest approach for multiclass classification
    # X : 2D array (n_samples, n_features) - training data
    # y : 1D array (n_samples,) - target values
    def fit(self, X, y):
        self.classes = np.unique(y)  # Get unique class labels
        for cls in self.classes:
            y_binary = np.where(y == cls, 1, -1)  # Convert to binary labels (One-vs-Rest)
            model = self.train_svm(X, y_binary)  # Train SVM for current class
            self.models.append((cls, model))  # Store the model

    # Train an SVM for binary classification
    # X : 2D array (n_samples, n_features) - training data
    # y : 1D array (n_samples,) - target values
    # - model: dict - Contains trained model parameters:
    #       - 'alphas': 1D array-like, shape (n_samples,) - Lagrange multipliers
    #       - 'support_vectors': 2D array-like, shape (n_support_vectors, n_features) - Support vectors
    #       - 'support_vector_labels': 1D array-like, shape (n_support_vectors,) - Labels of support vectors
    #       - 'b': float - Bias term
    def train_svm(self, X, y):
        n_samples, n_features = X.shape
        K = rbf_kernel(X, X, self.gamma)  # Compute the kernel matrix

        # Formulate the quadratic programming problem for SVM
        P = matrix(np.outer(y, y) * K)  # Quadratic term
        q = matrix(np.ones(n_samples) * -1)  # Linear term
        G = matrix(np.vstack((np.diag(-np.ones(n_samples)), np.diag(np.ones(n_samples)))))  # Constraints
        h = matrix(np.hstack((np.zeros(n_samples), np.ones(n_samples) * self.C)))  # Constraints bounds
        A = matrix(y, (1, n_samples), 'd')  # Equality constraint matrix
        b = matrix(0.0)  # Equality constraint value

        solvers.options['show_progress'] = False  # Disable solver progress display
        solution = solvers.qp(P, q, G, h, A, b)  # Solve the quadratic programming problem
        alphas = np.ravel(solution['x'])  # Extract Lagrange multipliers

        # Identify support vectors
        support_vector_indices = alphas > 1e-5
        support_vectors = X[support_vector_indices]
        support_vector_labels = y[support_vector_indices]
        support_vector_alphas = alphas[support_vector_indices]

        # Compute the bias term (intercept)
        b = np.mean(
            support_vector_labels - 
            np.sum(support_vector_alphas * support_vector_labels * K[support_vector_indices][:, support_vector_indices], axis=1)
        )

        return {'alphas': support_vector_alphas, 'support_vectors': support_vectors, 'support_vector_labels': support_vector_labels, 'b': b}

    # Predict class labels for input data
    # X : 2D array (n_samples, n_features) - input data
    # Returns: 1D array (n_samples,) - Predicted class labels
    def predict(self, X):
        predictions = np.zeros((X.shape[0], len(self.models)))  # Initialize prediction matrix
        for idx, (cls, model) in enumerate(self.models):
            K = rbf_kernel(X, model['support_vectors'], self.gamma)  # Compute kernel matrix for current model
            # Compute decision function for each model
            decision_function = np.sum(model['alphas'] * model['support_vector_labels'] * K, axis=1) + model['b']
            predictions[:, idx] = decision_function  # Store predictions for the current class

        # Return the class with the highest decision function value
        return self.classes[np.argmax(predictions, axis=1)]


In [33]:
classifier = SVM(C=1.0, gamma='scale')
classifier.fit(tfidf_matrix_train, y_train)
y_pred = classifier.predict(tfidf_matrix_test)

In [34]:
def evaluate_model(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    conf_matrix = confusion_matrix(y_test, y_pred)
    return accuracy, f1, precision, recall, conf_matrix

In [35]:
accuracy, f1, precision, recall, conf_matrix = evaluate_model(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

Accuracy: 0.7975903614457831
F1 Score: 0.7101189934756182
Precision: 0.7446830801466736
Recall: 0.7975903614457831


In [36]:
start_time = time.time()

# Create SVM classifier with custom kernel
classifier = svm.SVC(kernel=lambda X, Y: rbf_kernel(X, Y, gamma='scale'), C=1.0)

# Train the model
classifier.fit(tfidf_matrix_train, y_train)

# Make predictions
y_pred = classifier.predict(tfidf_matrix_test)

# time for the model running
custom_time = time.time() - start_time

# evaluate model
accuracy, f1, precision, recall, conf_matrix = evaluate_model(y_test, y_pred)

print(f"Time: {custom_time:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

Time: 1.1804
Accuracy: 0.7976
F1 Score: 0.7101
Precision: 0.7447
Recall: 0.7976


In [37]:
# count the f1 score for the negative label only
f1_score(y_test, y_pred, average=None)
# f1_score(y_test, y_pred, average='macro')

array([0.04255319, 0.        , 0.88709677])

In [38]:
cm = confusion_matrix(y_test, y_pred)

print(cm)

[[  2   0  90]
 [  0   0  78]
 [  0   0 660]]


In [39]:
# SVM library different kernel
start_time = time.time()
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

kernels_comparison = []
for kernel in kernels:

    time_start = time.time()

    clf = svm.SVC(kernel=kernel, C=1.0, gamma='scale')
    clf.fit(tfidf_matrix_train, y_train)
    y_pred = clf.predict(tfidf_matrix_test)

    time_end = time.time() - time_start

    accuracy, f1, precision, recall, conf_matrix = evaluate_model(y_test, y_pred)
    print(f"Kernel: {kernel}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(conf_matrix)

    kernels_comparison.append({
        'kernel': kernel,
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'time': time_end,
        'confusion_matrix': conf_matrix
    })

Kernel: linear
Accuracy: 0.8036
F1 Score: 0.7235
Precision: 0.7485
Recall: 0.8036
[[  7   0  85]
 [  0   0  78]
 [  0   0 660]]
Kernel: poly
Accuracy: 0.7940
F1 Score: 0.7039
Precision: 0.6321
Recall: 0.7940
[[  0   0  92]
 [  0   0  78]
 [  0   1 659]]
Kernel: rbf
Accuracy: 0.8024
F1 Score: 0.7209
Precision: 0.7478
Recall: 0.8024
[[  6   0  86]
 [  0   0  78]
 [  0   0 660]]
Kernel: sigmoid
Accuracy: 0.8193
F1 Score: 0.7567
Precision: 0.7560
Recall: 0.8193
[[ 22   0  70]
 [  1   0  77]
 [  0   2 658]]


In [40]:
# save to csv
df_comparison = pd.DataFrame(kernels_comparison)
df_comparison
# df_comparison.to_csv('kernels_comparison.csv', index=False)

Unnamed: 0,kernel,accuracy,f1,precision,recall,time,confusion_matrix
0,linear,0.803614,0.723455,0.748534,0.803614,29.515241,"[[7, 0, 85], [0, 0, 78], [0, 0, 660]]"
1,poly,0.793976,0.70386,0.632116,0.793976,64.627163,"[[0, 0, 92], [0, 0, 78], [0, 1, 659]]"
2,rbf,0.80241,0.720876,0.74776,0.80241,50.049158,"[[6, 0, 86], [0, 0, 78], [0, 0, 660]]"
3,sigmoid,0.819277,0.756715,0.755998,0.819277,23.867402,"[[22, 0, 70], [1, 0, 77], [0, 2, 658]]"


In [41]:
# Define the parameter grid
C_values = [0.01, 0.1, 1, 10, 100]
gamma_values = ['scale', 'auto']

results = []

for C in C_values:
    for gamma in gamma_values:
        print(f"Training with C={C}, gamma={gamma}")
        
        start_time = time.time()
        
        # Create SVM classifier with custom kernel
        clf = svm.SVC(kernel=lambda X, Y: rbf_kernel(X, Y, gamma), C=C)
        
        # Train the model
        clf.fit(tfidf_matrix_train, y_train)
        
        # Make predictions
        y_pred = clf.predict(tfidf_matrix_test)
        
        # time for the model running
        custom_time = time.time() - start_time

        # Evaluate the model
        accuracy, f1, precision, recall, conf_matrix = evaluate_model(y_test, y_pred)

        results.append({
            'C': C,
            'gamma': gamma,
            'accuracy': accuracy,
            'f1_score': f1,
            'precision': precision,
            'recall': recall,
            'time': custom_time,
            'confusion_matrix': conf_matrix
        })

        print(f"Accuracy: {accuracy:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"Time: {custom_time:.4f}")

Training with C=0.01, gamma=scale
Accuracy: 0.7952
F1 Score: 0.7045
Precision: 0.6323
Recall: 0.7952
Time: 1.1400
Training with C=0.01, gamma=auto
Accuracy: 0.7952
F1 Score: 0.7045
Precision: 0.6323
Recall: 0.7952
Time: 0.8815
Training with C=0.1, gamma=scale
Accuracy: 0.7952
F1 Score: 0.7045
Precision: 0.6323
Recall: 0.7952
Time: 1.1542
Training with C=0.1, gamma=auto
Accuracy: 0.7952
F1 Score: 0.7045
Precision: 0.6323
Recall: 0.7952
Time: 0.9216
Training with C=1, gamma=scale
Accuracy: 0.7976
F1 Score: 0.7101
Precision: 0.7447
Recall: 0.7976
Time: 1.1719
Training with C=1, gamma=auto
Accuracy: 0.7952
F1 Score: 0.7045
Precision: 0.6323
Recall: 0.7952
Time: 0.9503
Training with C=10, gamma=scale
Accuracy: 0.8096
F1 Score: 0.7358
Precision: 0.7524
Recall: 0.8096
Time: 1.1540
Training with C=10, gamma=auto
Accuracy: 0.7952
F1 Score: 0.7045
Precision: 0.6323
Recall: 0.7952
Time: 0.9162
Training with C=100, gamma=scale
Accuracy: 0.8096
F1 Score: 0.7358
Precision: 0.7524
Recall: 0.8096
Time

In [42]:
# Print all results
for result in results:
    print(f"C: {result['C']}, gamma: {result['gamma']}, Accuracy: {result['accuracy']:.4f}, F1 Score: {result['f1_score']:.4f}, Precision: {result['precision']:.4f}, Recall: {result['recall']:.4f}, Time: {result['time']:.4f}")

C: 0.01, gamma: scale, Accuracy: 0.7952, F1 Score: 0.7045, Precision: 0.6323, Recall: 0.7952, Time: 1.1400
C: 0.01, gamma: auto, Accuracy: 0.7952, F1 Score: 0.7045, Precision: 0.6323, Recall: 0.7952, Time: 0.8815
C: 0.1, gamma: scale, Accuracy: 0.7952, F1 Score: 0.7045, Precision: 0.6323, Recall: 0.7952, Time: 1.1542
C: 0.1, gamma: auto, Accuracy: 0.7952, F1 Score: 0.7045, Precision: 0.6323, Recall: 0.7952, Time: 0.9216
C: 1, gamma: scale, Accuracy: 0.7976, F1 Score: 0.7101, Precision: 0.7447, Recall: 0.7976, Time: 1.1719
C: 1, gamma: auto, Accuracy: 0.7952, F1 Score: 0.7045, Precision: 0.6323, Recall: 0.7952, Time: 0.9503
C: 10, gamma: scale, Accuracy: 0.8096, F1 Score: 0.7358, Precision: 0.7524, Recall: 0.8096, Time: 1.1540
C: 10, gamma: auto, Accuracy: 0.7952, F1 Score: 0.7045, Precision: 0.6323, Recall: 0.7952, Time: 0.9162
C: 100, gamma: scale, Accuracy: 0.8096, F1 Score: 0.7358, Precision: 0.7524, Recall: 0.8096, Time: 1.2033
C: 100, gamma: auto, Accuracy: 0.7952, F1 Score: 0.704

In [43]:
results[7]

{'C': 10,
 'gamma': 'auto',
 'accuracy': 0.7951807228915663,
 'f1_score': 0.7044554055146761,
 'precision': 0.6323123820583539,
 'recall': 0.7951807228915663,
 'time': 0.9162280559539795,
 'confusion_matrix': array([[  0,   0,  92],
        [  0,   0,  78],
        [  0,   0, 660]], dtype=int64)}

In [44]:
# Save results to CSV
results_df = pd.DataFrame(results)
results_df
# results_df.to_csv('output_svm_0907.csv', index=False)

Unnamed: 0,C,gamma,accuracy,f1_score,precision,recall,time,confusion_matrix
0,0.01,scale,0.795181,0.704455,0.632312,0.795181,1.140023,"[[0, 0, 92], [0, 0, 78], [0, 0, 660]]"
1,0.01,auto,0.795181,0.704455,0.632312,0.795181,0.881516,"[[0, 0, 92], [0, 0, 78], [0, 0, 660]]"
2,0.1,scale,0.795181,0.704455,0.632312,0.795181,1.154236,"[[0, 0, 92], [0, 0, 78], [0, 0, 660]]"
3,0.1,auto,0.795181,0.704455,0.632312,0.795181,0.921607,"[[0, 0, 92], [0, 0, 78], [0, 0, 660]]"
4,1.0,scale,0.79759,0.710119,0.744683,0.79759,1.171909,"[[2, 0, 90], [0, 0, 78], [0, 0, 660]]"
5,1.0,auto,0.795181,0.704455,0.632312,0.795181,0.950321,"[[0, 0, 92], [0, 0, 78], [0, 0, 660]]"
6,10.0,scale,0.809639,0.735754,0.752432,0.809639,1.153955,"[[12, 0, 80], [0, 0, 78], [0, 0, 660]]"
7,10.0,auto,0.795181,0.704455,0.632312,0.795181,0.916228,"[[0, 0, 92], [0, 0, 78], [0, 0, 660]]"
8,100.0,scale,0.809639,0.735754,0.752432,0.809639,1.203256,"[[12, 0, 80], [0, 0, 78], [0, 0, 660]]"
9,100.0,auto,0.795181,0.704455,0.632312,0.795181,1.022688,"[[0, 0, 92], [0, 0, 78], [0, 0, 660]]"
