In [49]:
# import libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, mean_squared_error
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, KFold
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import GridSearchCV

In [2]:
# read the test and train data files
train_df = pd.read_csv("train.csv")
valid_df = pd.read_csv("valid.csv")
test_df = pd.read_csv("test.csv")

label 01

In [16]:
train1_df = train_df.iloc[:,:-3]
valid1_df = valid_df.iloc[:, :-3]
test1_df = test_df.iloc[:, 1:]

train1_df.dropna(inplace=True)
valid1_df.dropna(inplace=True)
test1_df.dropna(inplace=True)

In [18]:
# splitting the test and train datasets into X and Y values
X1_train= train1_df.iloc[:,0:-1].values
Y1_train = train1_df.iloc[:,-1].values
X1_valid = valid1_df.iloc[:,0:-1].values
Y1_valid = valid1_df.iloc[:,-1].values
X1_test = test1_df.iloc[:,:].values

In [19]:
# scalling and fitting data
scaler = StandardScaler()
scaler.fit(X1_train)

X1_train = scaler.transform(X1_train)
X1_valid = scaler.transform(X1_valid)
X1_test = scaler.transform(X1_test)

In [15]:
classifiers = [
    ("Random Forest", RandomForestClassifier()),
    ("K-Nearest Neighbors", KNeighborsClassifier()),
    ("Support Vector Machine", SVC())
]

kf = KFold(n_splits=5, shuffle=True, random_state=42)  # You can change the shuffling and random state

# Iterate over each classifier and perform cross-validation
for clf_name, clf in classifiers:
    cross_val_scores = cross_val_score(clf, X_1_train, Y_1_train, cv=kf, scoring='accuracy')
    
    # Print the cross-validation scores for each classifier
    print(f"{clf_name} Cross-validation scores:", cross_val_scores)
    
    # Calculate and print the mean and standard deviation of the scores
    print(f"{clf_name} Mean accuracy:", cross_val_scores.mean())
    print(f"{clf_name} Standard deviation:", cross_val_scores.std())
    print("\n")

Random Forest Cross-validation scores: [0.88253857 0.88078541 0.88569425 0.8828892  0.87903226]
Random Forest Mean accuracy: 0.8821879382889201
Random Forest Standard deviation: 0.002228646364946794


K-Nearest Neighbors Cross-validation scores: [0.85396213 0.86062412 0.86360449 0.85326087 0.86062412]
K-Nearest Neighbors Mean accuracy: 0.8584151472650771
K-Nearest Neighbors Standard deviation: 0.004076379138663002


Support Vector Machine Cross-validation scores: [0.95143759 0.94810659 0.94758065 0.94810659 0.94740533]
Support Vector Machine Mean accuracy: 0.9485273492286115
Support Vector Machine Standard deviation: 0.0014818055125606759




In [23]:
# Initialize and train a Support Vector Machine classifier
svm_classifier = SVC(kernel='linear', C=1.0)
svm_classifier.fit(X1_train, Y1_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X1_valid)

print(classification_report(Y1_valid, y_pred))

              precision    recall  f1-score   support

           1       0.92      0.92      0.92        13
           2       1.00      0.78      0.88         9
           3       0.86      1.00      0.92        12
           4       0.89      1.00      0.94        16
           5       1.00      0.94      0.97        18
           6       0.90      1.00      0.95         9
           7       1.00      0.94      0.97        17
           8       0.93      1.00      0.97        14
           9       0.91      0.91      0.91        11
          10       1.00      1.00      1.00         8
          11       1.00      1.00      1.00        19
          12       1.00      1.00      1.00         7
          13       1.00      0.91      0.95        11
          14       0.92      0.73      0.81        15
          15       1.00      1.00      1.00        17
          16       1.00      1.00      1.00        14
          17       1.00      1.00      1.00        14
          18       0.89    

In [43]:
# Create a SelectKBest instance with a scoring function (e.g., chi-squared)
selector = SelectKBest(score_func=f_classif, k=250)  # Select the top 2 features

# Fit and transform your data to select the best k features
X1_best_train = selector.fit_transform(X1_train, Y1_train)
X1_best_valid = selector.transform(X1_valid)
X1_best_test = selector.transform(X1_test)

In [44]:
svm_classifier.fit(X1_best_train, Y1_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X1_best_valid)

print(classification_report(Y1_valid, y_pred))

              precision    recall  f1-score   support

           1       1.00      0.92      0.96        13
           2       1.00      0.89      0.94         9
           3       0.86      1.00      0.92        12
           4       0.83      0.94      0.88        16
           5       0.85      0.94      0.89        18
           6       0.89      0.89      0.89         9
           7       1.00      0.94      0.97        17
           8       0.88      1.00      0.93        14
           9       1.00      0.82      0.90        11
          10       0.80      1.00      0.89         8
          11       1.00      1.00      1.00        19
          12       1.00      0.86      0.92         7
          13       0.91      0.91      0.91        11
          14       0.92      0.73      0.81        15
          15       1.00      1.00      1.00        17
          16       1.00      1.00      1.00        14
          17       0.87      0.93      0.90        14
          18       0.90    

In [45]:
pca=PCA(0.9)
pca = pca.fit(X1_best_train)

x_1_train_pca=pca.fit_transform(X1_best_train)
x_1_valid_pca = pca.transform(X1_best_valid)
x_1_test_pca = pca.transform(X1_best_test)

In [46]:
svm_classifier.fit(x_1_train_pca, Y1_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(x_1_valid_pca)

print(classification_report(Y1_valid, y_pred))

              precision    recall  f1-score   support

           1       0.71      0.92      0.80        13
           2       0.89      0.89      0.89         9
           3       0.86      1.00      0.92        12
           4       0.94      0.94      0.94        16
           5       0.80      0.89      0.84        18
           6       1.00      0.89      0.94         9
           7       0.94      0.88      0.91        17
           8       0.93      0.93      0.93        14
           9       1.00      0.91      0.95        11
          10       0.80      1.00      0.89         8
          11       1.00      0.95      0.97        19
          12       1.00      1.00      1.00         7
          13       0.91      0.91      0.91        11
          14       0.92      0.73      0.81        15
          15       1.00      1.00      1.00        17
          16       1.00      1.00      1.00        14
          17       1.00      0.79      0.88        14
          18       0.94    

In [47]:
x_1_train_pca.shape

(28520, 108)

In [51]:
param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel type
    'gamma': ['scale', 'auto', 0.1, 1]  # Kernel coefficient for 'rbf' and 'poly'
}

# Create a GridSearchCV object with cross-validation
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)

# Fit the GridSearchCV object to the data to find the best hyperparameters
grid_search.fit(x_1_train_pca, Y1_train)

# Print the best hyperparameters and corresponding accuracy
best_params = grid_search.best_params_
best_accuracy = grid_search.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best Hyperparameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best Accuracy: 0.9652173913043478


In [53]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(x_1_valid_pca)

print(classification_report(Y1_valid, y_pred))

              precision    recall  f1-score   support

           1       1.00      0.92      0.96        13
           2       1.00      0.89      0.94         9
           3       0.86      1.00      0.92        12
           4       0.89      1.00      0.94        16
           5       1.00      1.00      1.00        18
           6       1.00      1.00      1.00         9
           7       1.00      1.00      1.00        17
           8       1.00      1.00      1.00        14
           9       1.00      0.91      0.95        11
          10       1.00      1.00      1.00         8
          11       1.00      1.00      1.00        19
          12       1.00      1.00      1.00         7
          13       1.00      0.91      0.95        11
          14       0.93      0.93      0.93        15
          15       0.94      0.94      0.94        17
          16       1.00      1.00      1.00        14
          17       1.00      1.00      1.00        14
          18       1.00    

label 02