# Enhanced NDVI Logistic Regression Model

In [2]:

# NDVI-Based Land Cover Classification Using Logistic Regression (Enhanced)

## 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

In [3]:
## 2. Load Data
train_df = pd.read_csv("/content/hacktrain.csv")
test_df = pd.read_csv("/content/hacktest.csv")
train_df.drop(columns=["Unnamed: 0"], inplace=True)
test_df.drop(columns=["Unnamed: 0"], inplace=True)


In [4]:
## 3. Prepare Features and Target
X = train_df.drop(columns=["ID", "class"])
y = train_df["class"]
X_test = test_df.drop(columns=["ID"])

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [5]:
## 4. Enhanced Pipeline with Scaling
pipeline = make_pipeline(
    SimpleImputer(strategy="mean"),
    StandardScaler(),
    LogisticRegression(max_iter=5000, solver="lbfgs")
)

X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [6]:
## 5. Train and Evaluate
pipeline.fit(X_train, y_train)
y_val_pred = pipeline.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))

Validation Accuracy: 0.8775


In [7]:
## 6. Final Training and Submission
pipeline.fit(X, y_encoded)
y_test_pred = pipeline.predict(X_test)
y_test_labels = label_encoder.inverse_transform(y_test_pred)

submission_df = pd.DataFrame({
    "ID": test_df["ID"],
    "class": y_test_labels
})
submission_df.to_csv("submission.csv", index=False)