In [None]:
#SVMRBF
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, GridSearchCV, train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score
import joblib

# Load dataset
filename = '/content/ReducedTweets.csv'
dataset = pd.read_csv(filename)

X = dataset.iloc[:, 1:16].values
y = dataset.iloc[:, 16].values

# Hyperparameters
svm_rbf_parameters = {'C': [10**i for i in range(-3, 0, 10)], 'gamma': [10**i for i in range(-3, 0, 1)]}

# Split the data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)

# Cross-validation strategy
outer_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
inner_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Initialize
svm_rbf_accuracy_scores = []
svm_rbf_precision_scores = []
svm_rbf_recall_scores = []
svm_rbf_f1_scores = []

# best model
best_svm_rbf_model = None
best_svm_rbf_score = 0.0

# cross-validation
for train_index, test_index in outer_cv.split(X_train, y_train):
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

    # SVM RBF
    svm_rbf_classifier = SVC(kernel='rbf', random_state=42)
    svm_rbf_grid_search = GridSearchCV(svm_rbf_classifier, svm_rbf_parameters, cv=inner_cv)
    svm_rbf_grid_search.fit(X_train_fold, y_train_fold)
    svm_rbf_best_params = svm_rbf_grid_search.best_params_
    svm_rbf_best_model = SVC(kernel='rbf', random_state=42, **svm_rbf_best_params)

    # Fit best model on full training set
    svm_rbf_best_model.fit(X_train_fold, y_train_fold)

    # test set
    svm_rbf_predictions = svm_rbf_best_model.predict(X_test_fold)

    # Append
    svm_rbf_accuracy_scores.append(accuracy_score(y_test_fold, svm_rbf_predictions))
    svm_rbf_precision_scores.append(precision_score(y_test_fold, svm_rbf_predictions))
    svm_rbf_recall_scores.append(recall_score(y_test_fold, svm_rbf_predictions))
    svm_rbf_f1_scores.append(f1_score(y_test_fold, svm_rbf_predictions))

    # Store the best model
    current_score = accuracy_score(y_test_fold, svm_rbf_predictions)
    if current_score > best_svm_rbf_score:
        best_svm_rbf_score = current_score
        best_svm_rbf_model = svm_rbf_best_model

# Save
if best_svm_rbf_model is not None:
    joblib.dump(best_svm_rbf_model, 'best_svm_rbf_model.pkl')

# Calculate
svm_rbf_average_accuracy = np.mean(svm_rbf_accuracy_scores)
svm_rbf_average_precision = np.mean(svm_rbf_precision_scores)
svm_rbf_average_recall = np.mean(svm_rbf_recall_scores)
svm_rbf_average_f1 = np.mean(svm_rbf_f1_scores)

# Print
print("\nSVM RBF Test Set:")
print("Average Accuracy:", svm_rbf_average_accuracy)
print("Average Precision:", svm_rbf_average_precision)
print("Average Recall:", svm_rbf_average_recall)
print("Average F1-score:", svm_rbf_average_f1)


SVM RBF Test Set:
Average Accuracy: 0.7411666666666666
Average Precision: 0.7549917739213529
Average Recall: 0.7138333333333333
Average F1-score: 0.7336976022532193
