# Forest Cover Type ‚Äì Training Notebook
Algorithms: Logistic Regression, SVM, MLP Neural Network.

**Goal:** Achieve best accuracy using optimized model configurations.

## 1. Load Data

In [1]:
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

warnings.filterwarnings('ignore')

# Load data
df = pd.read_csv("covtype.csv")

X = df.drop(columns=["Cover_Type"])
y = df["Cover_Type"]

# Train-test split with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Feature scaling (important for SVM and MLP)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
print(f"Number of features: {X_train.shape[1]}")
print(f"Number of classes: {len(np.unique(y))}")


Training set size: 464809
Test set size: 116203
Number of features: 54
Number of classes: 7


## 2. Logistic Regression

In [2]:
# Logistic Regression with optimized parameters
lr = LogisticRegression(
    max_iter=1000,
    solver='lbfgs',
    multi_class='multinomial',
    n_jobs=-1,
    random_state=42
)
lr.fit(X_train, y_train)
pred_lr = lr.predict(X_test)

lr_accuracy = accuracy_score(y_test, pred_lr)
print(f"Logistic Regression Accuracy: {lr_accuracy:.4f}")
print(classification_report(y_test, pred_lr))


Logistic Regression Accuracy: 0.7234
              precision    recall  f1-score   support

           1       0.71      0.70      0.70     42368
           2       0.75      0.80      0.77     56661
           3       0.68      0.80      0.73      7151
           4       0.61      0.44      0.51       549
           5       0.14      0.00      0.01      1899
           6       0.49      0.27      0.35      3473
           7       0.74      0.56      0.63      4102

    accuracy                           0.72    116203
   macro avg       0.59      0.51      0.53    116203
weighted avg       0.71      0.72      0.71    116203



## 3. Support Vector Machine (LinearSVC)

In [None]:
# LinearSVC with optimized parameters
svm = LinearSVC(
    C=1.0,
    max_iter=2000,
    dual=True,
    random_state=42
)
svm.fit(X_train, y_train)
pred_svm = svm.predict(X_test)

svm_accuracy = accuracy_score(y_test, pred_svm)
print(f"SVM (LinearSVC) Accuracy: {svm_accuracy:.4f}")
print(classification_report(y_test, pred_svm))


## 4. Neural Network (MLPClassifier)

In [None]:
# MLP Neural Network with optimized hyperparameters
# Based on grid search results from reference, (100, 100) hidden layers work best
mlp = MLPClassifier(
    hidden_layer_sizes=(100, 100),
    max_iter=300,
    activation='relu',
    solver='adam',
    alpha=0.0001,
    batch_size='auto',
    learning_rate='adaptive',
    learning_rate_init=0.001,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=10,
    random_state=42,
    verbose=True
)

print("Training MLP Neural Network...")
mlp.fit(X_train, y_train)
pred_mlp = mlp.predict(X_test)

mlp_accuracy = accuracy_score(y_test, pred_mlp)
print(f"\nMLP Neural Network Accuracy: {mlp_accuracy:.4f}")
print(classification_report(y_test, pred_mlp))


## 5. MLP with Grid Search (Optional - for best accuracy)

In [None]:
# Grid Search for MLP to find optimal hyperparameters
# Note: This may take several minutes to run

param_grid = {
    'hidden_layer_sizes': [(100,), (200,), (100, 100), (100, 50)],
    'activation': ['relu'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.001],
    'learning_rate': ['adaptive'],
    'max_iter': [200]
}

mlp_gs = MLPClassifier(
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=10,
    random_state=42
)

print("Running Grid Search for MLP (this may take a while)...")
grid_search = GridSearchCV(
    mlp_gs, 
    param_grid, 
    cv=3, 
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)
grid_search.fit(X_train, y_train)

print(f"\nBest Parameters: {grid_search.best_params_}")
print(f"Best CV Score: {grid_search.best_score_:.4f}")

# Evaluate on test set
best_mlp = grid_search.best_estimator_
pred_best_mlp = best_mlp.predict(X_test)
best_mlp_accuracy = accuracy_score(y_test, pred_best_mlp)
print(f"Best MLP Test Accuracy: {best_mlp_accuracy:.4f}")
print(classification_report(y_test, pred_best_mlp))

## 6. Model Comparison Summary

In [None]:
# Model Comparison Summary
import pandas as pd

results = {
    'Model': ['Logistic Regression', 'SVM (LinearSVC)', 'MLP Neural Network', 'MLP (Grid Search)'],
    'Accuracy': [lr_accuracy, svm_accuracy, mlp_accuracy, best_mlp_accuracy]
}

results_df = pd.DataFrame(results)
results_df = results_df.sort_values('Accuracy', ascending=False).reset_index(drop=True)

print("=" * 50)
print("MODEL COMPARISON - FOREST COVER TYPE PREDICTION")
print("=" * 50)
print(results_df.to_string(index=False))
print("=" * 50)
print(f"\nüèÜ Best Model: {results_df.iloc[0]['Model']}")
print(f"   Best Accuracy: {results_df.iloc[0]['Accuracy']:.4f} ({results_df.iloc[0]['Accuracy']*100:.2f}%)")