In [8]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from collections import Counter 
from sklearn.feature_extraction.text import CountVectorizer
from scipy.sparse import csr_matrix, hstack
import itertools
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import joblib

In [2]:
train_feat = np.load("datasets/train/train_feature.npz", allow_pickle=True)
train_feat_X = train_feat['features']
train_feat_Y = train_feat['label']

val_feat = np.load("datasets/valid/valid_feature.npz", allow_pickle=True)
val_feat_X = val_feat['features']
val_feat_Y = val_feat['label']

In [3]:
n_samples_train = train_feat_X.shape[0]
n_samples_val = val_feat_X.shape[0]

X_train_flattened = train_feat_X.reshape(n_samples_train, -1)  
X_val_flattened = val_feat_X.reshape(n_samples_val, -1) 

In [4]:
pca = PCA(n_components=100)  # Set the desired number of components
pca.fit(X_train_flattened)

X_train_pca = pca.transform(X_train_flattened)
X_val_pca = pca.transform(X_val_flattened)

In [6]:
model = SVC(C=100, gamma= 0.01, kernel= 'rbf')
model.fit(X_train_pca,train_feat_Y)

y_pred = model.predict(X_val_pca)
accuracy = accuracy_score(val_feat_Y, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(val_feat_Y, y_pred))

Accuracy: 0.99
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       252
           1       0.99      0.99      0.99       237

    accuracy                           0.99       489
   macro avg       0.99      0.99      0.99       489
weighted avg       0.99      0.99      0.99       489



In [10]:
joblib.dump(pca, 'PCA_for_features_model.pkl')
joblib.dump(model, 'Trained_features_model.pkl')

['Trained_features_model.pkl']