In [3]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load datasets
train_df = pd.read_csv("/content/hog_features_train.csv")
test_df = pd.read_csv("/content/fruits_hog_features_test.csv")

# Set label column
label_col = "Class"

# Drop rows with missing labels
train_df = train_df.dropna(subset=[label_col])
test_df = test_df.dropna(subset=[label_col])

# Features and labels
X_train = train_df.drop(columns=[label_col])
y_train = train_df[label_col]

X_test = test_df.drop(columns=[label_col])
y_test = test_df[label_col]

# Label encode classes
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# Normalize features (important for GaussianNB)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Try a few values of var_smoothing manually (start with 1e-9)
model = GaussianNB(var_smoothing=1e-9)
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)

# Evaluate
acc = accuracy_score(y_test, y_pred)
print(f"\n✅ Accuracy: {acc * 100:.2f}%")
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))


✅ Accuracy: 55.27%

📊 Classification Report:
                       precision    recall  f1-score   support

              Apple 6       0.60      0.88      0.71       157
     Apple Braeburn 1       0.43      0.61      0.51       164
 Apple Crimson Snow 1       0.37      0.30      0.33       148
       Apple Golden 1       0.57      0.79      0.66       160
       Apple Golden 2       0.67      0.64      0.65       164
       Apple Golden 3       0.81      0.57      0.67       161
 Apple Granny Smith 1       0.85      0.67      0.75       164
    Apple Pink Lady 1       0.42      0.95      0.58       152
          Apple Red 1       0.23      0.13      0.17       164
          Apple Red 2       0.44      0.49      0.47       164
          Apple Red 3       0.42      0.48      0.45       144
Apple Red Delicious 1       0.82      0.83      0.82       166
   Apple Red Yellow 1       0.24      0.23      0.24       164
   Apple Red Yellow 2       0.70      0.75      0.73       219
        

In [4]:
smoothing_values = [1e-10, 1e-9, 1e-8, 1e-7, 1e-6]
best_acc = 0
best_smoothing = None

for val in smoothing_values:
    model = GaussianNB(var_smoothing=val)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    print(f"var_smoothing={val} → Accuracy: {acc * 100:.2f}%")

    if acc > best_acc:
        best_acc = acc
        best_smoothing = val

print(f"\n🔍 Best var_smoothing: {best_smoothing} → Accuracy: {best_acc * 100:.2f}%")


var_smoothing=1e-10 → Accuracy: 55.23%
var_smoothing=1e-09 → Accuracy: 55.27%
var_smoothing=1e-08 → Accuracy: 55.32%
var_smoothing=1e-07 → Accuracy: 55.38%
var_smoothing=1e-06 → Accuracy: 55.43%

🔍 Best var_smoothing: 1e-06 → Accuracy: 55.43%
