I want to show you how to get results faster without changing the code. To do this, we will use another Python library, **[scikit-learn-intelex](https://github.com/intel/scikit-learn-intelex)**. It accelerates scikit-learn and does not require you changing the code written for scikit-learn.

In [1]:
!pip install scikit-learn-intelex -q --progress-bar off
from sklearnex import patch_sklearn
patch_sklearn()

In [1]:
import pandas as pd

df = pd.read_json('https://github.com/lahdjirayhan/svm-dhany/blob/master/preprocessed_data.json?raw=true', lines = True)
df.head()

In [1]:
def collapse_list_to_string(lst):
    output = " ".join(lst)
    return output

df.loc[:, "ready"] = df.list_clean.apply(collapse_list_to_string)
df.head()

In [1]:
df_ready = df[df['label'] != 'neutral'][['ready', 'label']]
df_ready.shape

In [1]:
def classify_sentiment(s):
    if s =="positive":
        return 1
    else:
        return 0

df_ready.loc[:, "label"] = df_ready.label.apply(classify_sentiment)

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV

In [1]:
# pipeline untuk SVM RBF
svm_rbf_pipeline = Pipeline([
    ('FE', TfidfVectorizer()),     # Feature extractor
    ('CLF', SVC())                 # Clasifier, Support Vector Classifier
])

# Hal-hal yang ingin di-tuning/diketahui perbedaannya terhadap akurasi/evaluasi model
svm_rbf_hyperparameters = {
    
        "CLF__kernel": ["rbf"],
        
        "FE__ngram_range": [(1,1), (2,2), (3,3), (1,3)],
        "CLF__C": [0.1],
        "CLF__gamma": [2**-11, 2**-9, 2**-7, 2**-5, 2**-3]
    }

scores = ['accuracy', 'precision', 'recall', 'f1']

In [1]:
tweet_training, tweet_testing, label_training, label_testing = train_test_split(
    df_ready['ready'], df_ready['label'],
    test_size = 0.2,
    random_state = 42   # seperti set.seed
)

In [1]:
svm_rbf_grid_search = GridSearchCV(
    svm_rbf_pipeline, svm_rbf_hyperparameters,
    cv = 5, verbose = 3, refit = False, scoring = scores, return_train_score = True
)

In [1]:
svm_rbf_grid_search.fit(tweet_training, label_training)

In [1]:
svm_rbf_grid_search.cv_results_

In [1]:
# mendefinisikan hasil evaluasi ketepatan klasifikasi dengan tuning parameter
def print_results_from_cv(cv):
    for i, param in enumerate(cv["params"]):
        print("PARAMETER COMBINATION:", param)
        print()
        print("Training accuracy:", cv['mean_train_accuracy'][i])
        print("Training precision:", cv['mean_train_precision'][i])
        print("Training recall:", cv['mean_train_recall'][i])
        print("Training F1:", cv['mean_train_f1'][i])
        print()
        print("Test accuracy:", cv['mean_test_accuracy'][i])
        print("Test precision:", cv['mean_test_precision'][i])
        print("Test recall:", cv['mean_test_recall'][i])
        print("Test F1:", cv['mean_test_f1'][i])
        print("=====================================================")

# Print hasil akurasi model SVM kernel RBF dengan berbagai kombinasi n-grams , gamma dan C
print_results_from_cv(svm_rbf_grid_search.cv_results_)