In [2]:
import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn import svm


data = pd.read_csv("data.csv")
labels = pd.read_csv("labels.csv")

# Extract features (data) and target labels (classes)
X = data.values[:, 1:]  # Assuming the first column is an identifier, not a feature
y = labels.values[:, 1]  # Assuming the first column is an identifier, not a label

X, y = make_classification(n_samples=100, n_features=50, n_informative=10, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize SelectKBest with the desired score function (f_classif for classification)
k_best = SelectKBest(score_func=f_classif, k=10)  

# Fit and transform the feature matrix
X_train_k_best = k_best.fit_transform(X_train, y_train)
X_test_k_best = k_best.transform(X_test)

# Print the indices of the selected features
selected_feature_indices = np.where(k_best.get_support())[0]
print("Selected Feature Indices:", selected_feature_indices)

clf = SVC(kernel='linear', random_state=42)
clf.fit(X_train_k_best, y_train)

# Make predictions on the test set
svm_classifier = svm.SVC(kernel='linear')

# Fit the SVM model on the training data with selected features
svm_classifier.fit(X_train_k_best, y_train)

# Predict on the test data with selected features
y_pred = svm_classifier.predict(X_test_k_best)

# Calculate and print accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy is :",accuracy*100,"%")

# Print the selected feature names or indices
selected_feature_names = data.columns[1:][selected_feature_indices]

# Make predictions on the test set
svm_classifier = svm.SVC(kernel='linear')

# Fit the SVM model on the training data with selected features
svm_classifier.fit(X_train_k_best, y_train)

# Predict on the test data with selected features
y_pred = svm_classifier.predict(X_test_k_best)

# Calculate and print precision, recall, and F1 score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')



Selected Feature Indices: [ 8  9 13 20 25 28 30 31 39 40]
0.85
Precision: 0.86
Recall: 0.85
F1 Score: 0.85
