### SVC Model

In [1]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import fbeta_score,make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.metrics import (
    confusion_matrix,
    precision_score,
    recall_score,
    accuracy_score,
)

In [2]:
X_train = pd.read_csv('train_X_In-Car-Rec.csv')
y_train = pd.read_csv('train_y_In-Car-Rec.csv')
X_test = pd.read_csv('test_X_In-Car-Rec.csv')
y_test = pd.read_csv('test_y_In-Car-Rec.csv')

In [3]:
#Defining a function
scorer = make_scorer(fbeta_score, beta=2)

### Building a Pipeline with Parameter Grid for Hyperparameter Tuning

Now that we have our synthetic dataset, we can proceed to build a pipeline. A pipeline streamlines a lot of the routine processes, making it easier to manage complex workflows. In this example, our pipeline will consist of the following steps:
**Model Training**: Using `SVC` (Support Vector Classifier) for classification.
Let's go ahead and build this pipeline.

In [4]:
param_grid_svc = { 
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['linear', 'rbf'],
    'svc__gamma': ['scale', 'auto', 0.1, 1],
    'svc__degree': [2, 3, 4],
    'svc__coef0': [0.0, 0.1, 1.0],
    'svc__shrinking': [True, False],
    'svc__class_weight': [None, 'balanced'],
}

In [5]:
# Create pipelines for SVC
pipeline = Pipeline([    
    ('pca', PCA(n_components=10)),
    ('svc', SVC(max_iter=1000))
])

In [None]:
# Create GridSearchCV object
grid_search = GridSearchCV(pipeline, param_grid_svc, cv=5,scoring=scorer)

# Fit GridSearchCV
grid_search.fit(X_train, y_train.values.ravel())

In [None]:
# Get the best parameters and score
best_svc = grid_search.best_estimator_
best_params = grid_search.best_params_
best_score = grid_search.best_score_
best_params, best_score

### Final pipeline

In [8]:
# final pipeline
svc_pipeline = Pipeline([    
    ('pca', PCA(n_components=10)),
    ('svc', SVC(C=0.1, kernel='rbf'))
])

In [9]:
# Train the final pipeline
svc_pipeline.fit(X_train, y_train.values.ravel())

In [10]:
# Predict on the test set
y_pred = svc_pipeline.predict(X_test)

In [11]:
# Evaluate the pipeline on the test data
score = svc_pipeline.score(X_test, y_test)

# Calculate f1_score on the test data
f2_score = fbeta_score(y_test, y_pred, average='weighted', beta=2)
print(f'F2Score for the SVC Model is: '+str(f2_score))

F2Score for the SVC Model is: 0.5010476501580502


In [12]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Precision
precision = precision_score(y_test, y_pred, average="weighted")
print(f"\nPrecision (weighted): {precision:.4f}")

# Recall
recall = recall_score(y_test, y_pred, average="weighted")
print(f"Recall (weighted): {recall:.4f}")

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Confusion Matrix:
[[   0 1078]
 [   0 1459]]

Precision (weighted): 0.3307
Recall (weighted): 0.5751
Accuracy: 0.5751


  _warn_prf(average, modifier, msg_start, len(result))
