In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load data
train_df = pd.read_csv('/content/hog_features_train.csv')
test_df = pd.read_csv('/content/fruits_hog_features_test.csv')

# Ensure 'Class' column exists
if 'Class' not in train_df.columns or 'Class' not in test_df.columns:
    raise ValueError("Column 'Class' not found in dataset!")

# Convert 'Class' to string to ensure correct encoding
train_df['Class'] = train_df['Class'].astype(str)
test_df['Class'] = test_df['Class'].astype(str)

# Filter test set to only include classes present in training
known_classes = set(train_df['Class'].unique())
test_df = test_df[test_df['Class'].isin(known_classes)]

# Separate features and labels
X_train = train_df.drop(columns=['Class'])
y_train = train_df['Class']
X_test = test_df.drop(columns=['Class'])
y_test = test_df['Class']

# Encode class labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Impute NaN values (replace with mean)
imputer = SimpleImputer(strategy='mean')  # or 'median', 'most_frequent'
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Train SVM model
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(X_train_scaled, y_train_encoded)

# Predict on test data
y_pred = svm_model.predict(X_test_scaled)

# Evaluate model
accuracy = accuracy_score(y_test_encoded, y_pred)
precision = precision_score(y_test_encoded, y_pred, average='weighted')
recall = recall_score(y_test_encoded, y_pred, average='weighted')
f1 = f1_score(y_test_encoded, y_pred, average='weighted')

# Print results
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')

Accuracy: 0.8710
Precision: 0.8754
Recall: 0.8710
F1-score: 0.8680


In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load data
train_df = pd.read_csv('/content/hog_features_train.csv')
test_df = pd.read_csv('/content/fruits_hog_features_test.csv')

# Ensure 'Class' column exists
if 'Class' not in train_df.columns or 'Class' not in test_df.columns:
    raise ValueError("Column 'Class' not found in dataset!")

# Convert 'Class' to string to ensure correct encoding
train_df['Class'] = train_df['Class'].astype(str)
test_df['Class'] = test_df['Class'].astype(str)

# Filter test set to only include classes present in training
known_classes = set(train_df['Class'].unique())
test_df = test_df[test_df['Class'].isin(known_classes)]

# Separate features and labels
X_train = train_df.drop(columns=['Class'])
y_train = train_df['Class']
X_test = test_df.drop(columns=['Class'])
y_test = test_df['Class']

# Encode class labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Impute NaN values (replace with mean)
imputer = SimpleImputer(strategy='median')  # or 'median', 'most_frequent'
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Train SVM model
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(X_train_scaled, y_train_encoded)

# Predict on test data
y_pred = svm_model.predict(X_test_scaled)

# Evaluate model
accuracy = accuracy_score(y_test_encoded, y_pred)
precision = precision_score(y_test_encoded, y_pred, average='weighted')
recall = recall_score(y_test_encoded, y_pred, average='weighted')
f1 = f1_score(y_test_encoded, y_pred, average='weighted')

# Print results
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')

Accuracy: 0.8687
Precision: 0.8742
Recall: 0.8687
F1-score: 0.8656


In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load data
train_df = pd.read_csv('/content/hog_features_train.csv')
test_df = pd.read_csv('/content/fruits_hog_features_test.csv')

# Ensure 'Class' column exists
if 'Class' not in train_df.columns or 'Class' not in test_df.columns:
    raise ValueError("Column 'Class' not found in dataset!")

# Convert 'Class' to string to ensure correct encoding
train_df['Class'] = train_df['Class'].astype(str)
test_df['Class'] = test_df['Class'].astype(str)

# Filter test set to only include classes present in training
known_classes = set(train_df['Class'].unique())
test_df = test_df[test_df['Class'].isin(known_classes)]

# Separate features and labels
X_train = train_df.drop(columns=['Class'])
y_train = train_df['Class']
X_test = test_df.drop(columns=['Class'])
y_test = test_df['Class']

# Encode class labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Impute NaN values (replace with mean)
imputer = SimpleImputer(strategy='most_frequent')  # or 'median', 'most_frequent'
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Train SVM model
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(X_train_scaled, y_train_encoded)

# Predict on test data
y_pred = svm_model.predict(X_test_scaled)

# Evaluate model
accuracy = accuracy_score(y_test_encoded, y_pred)
precision = precision_score(y_test_encoded, y_pred, average='weighted')
recall = recall_score(y_test_encoded, y_pred, average='weighted')
f1 = f1_score(y_test_encoded, y_pred, average='weighted')

# Print results
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')

Accuracy: 0.8679
Precision: 0.8735
Recall: 0.8679
F1-score: 0.8646
