In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)

In [2]:
# Step 1: Load dataset
df = pd.read_csv("/content/drive/MyDrive/Yearbook of Agricultural Statistics/train1.csv")

In [3]:
# Step 2: Create classification target from 'Production (M.Ton)' using quantiles
df['Production_Class'] = pd.qcut(df['Production (M.Ton)'], q=3, labels=['Low', 'Medium', 'High'])

In [4]:
# Step 3: Separate features and target
X = df.drop(['Production (M.Ton)', 'Production_Class'], axis=1)
y = df['Production_Class']


In [5]:
# Step 4: Encode categorical variables
X_encoded = pd.get_dummies(X)

In [6]:
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [7]:
# Step 6: Train Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

In [9]:
# Step 7: Predictions
y_pred = rf_model.predict(X_test)


In [10]:
# Step 8: Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average=None, zero_division=0)
recall = recall_score(y_test, y_pred, average=None, zero_division=0)
f1 = f1_score(y_test, y_pred, average=None, zero_division=0)
macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
weighted_f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
report = classification_report(y_test, y_pred)


In [11]:
# Step 9: Print results
print("Accuracy:", accuracy)
print("Precision (per class):", precision)
print("Recall (per class):", recall)
print("F1 Score (per class):", f1)
print("Macro F1 Score:", macro_f1)
print("Weighted F1 Score:", weighted_f1)
print("\nClassification Report:\n", report)

Accuracy: 0.908203125
Precision (per class): [0.91317365 0.94134897 0.87106017]
Recall (per class): [0.91317365 0.95252226 0.8611898 ]
F1 Score (per class): [0.91317365 0.94690265 0.86609687]
Macro F1 Score: 0.9087243912195778
Weighted F1 Score: 0.9080453011938079

Classification Report:
               precision    recall  f1-score   support

        High       0.91      0.91      0.91       334
         Low       0.94      0.95      0.95       337
      Medium       0.87      0.86      0.87       353

    accuracy                           0.91      1024
   macro avg       0.91      0.91      0.91      1024
weighted avg       0.91      0.91      0.91      1024

