In [3]:
import pandas as pd

# Load the dataset from the file
file = 'car-info.csv'
data = pd.read_csv(file, header=None)

# View the first few rows of the data to understand its structure
print(data.head())
print(data.shape)


       0      1  2  3      4     5      6
0  vhigh  vhigh  2  2  small   low  unacc
1  vhigh  vhigh  2  2  small   med  unacc
2  vhigh  vhigh  2  2  small  high  unacc
3  vhigh  vhigh  2  2    med   low  unacc
4  vhigh  vhigh  2  2    med   med  unacc
(1728, 7)


In [4]:
# Assign column names to make the data understandable
data.columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']

# Check if the column names have been applied correctly
print(data.head())


  buying  maint doors persons lug_boot safety  class
0  vhigh  vhigh     2       2    small    low  unacc
1  vhigh  vhigh     2       2    small    med  unacc
2  vhigh  vhigh     2       2    small   high  unacc
3  vhigh  vhigh     2       2      med    low  unacc
4  vhigh  vhigh     2       2      med    med  unacc


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Convert categorical data to numerical using LabelEncoder
label_encoders = {}
for column in data.columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le  # Store label encoders for future use

# Split the data into features (X) and target (y)
X = data.drop('class', axis=1)  # Features (all columns except 'class')
y = data['class']  # Target (the 'class' column)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Train the SVM classifier
svm_classifier = SVC()
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svm_classifier.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9479768786127167

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.89      0.91        44
           1       1.00      0.71      0.83         7
           2       0.96      1.00      0.98       112
           3       0.89      0.80      0.84        10

    accuracy                           0.95       173
   macro avg       0.94      0.85      0.89       173
weighted avg       0.95      0.95      0.95       173



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('car-info.csv', header=None)

# Assign column names
data.columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']

# Convert categorical columns into numerical values using One-Hot Encoding (except 'class')
X = pd.get_dummies(data.drop('class',axis=1), drop_first=True)

# Convert the 'class' column to numerical using Label Encoding
le = LabelEncoder()
y = le.fit_transform(data['class'])

# Initialize K-Fold Cross-Validation (let's use K=5)
kf = KFold(n_splits=5, shuffle=True, random_state=1)

# Initialize a list to store accuracy scores
accuracy_scores = []

# Loop over each fold
for train_index, test_index in kf.split(X):
    # Split the data into training and testing sets for each fold
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Initialize the Support Vector Classifier (SVC)
    svm_clf = SVC(kernel='linear')

    # Train the model
    svm_clf.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = svm_clf.predict(X_test)

    # Calculate accuracy for this fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

# Print the accuracy for each fold and the average accuracy
print("Accuracy scores for each fold: ", accuracy_scores)
print("Average accuracy: ", np.mean(accuracy_scores))


Accuracy scores for each fold:  [0.9190751445086706, 0.9190751445086706, 0.9450867052023122, 0.9246376811594202, 0.9246376811594202]
Average accuracy:  0.9265024713076988
