In [None]:
# NDVI Land Cover Classification - Logistic Regression with Scaling

In [None]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy.signal import savgol_filter
import matplotlib.pyplot as plt

In [None]:
# Load datasets
train_df = pd.read_csv("hacktrain.csv")
test_df = pd.read_csv("hacktest.csv")

In [None]:
# Extract NDVI columns
ndvi_columns = [col for col in train_df.columns if "_N" in col]

In [None]:
X_raw = train_df[ndvi_columns].values
y_raw = train_df['class'].values
X_test_raw = test_df[ndvi_columns].values

In [None]:
# Impute missing values
imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X_raw)
X_test_imputed = imputer.transform(X_test_raw)

In [None]:
# Smooth NDVI time-series
X_smoothed = savgol_filter(X_imputed, window_length=5, polyorder=2, axis=1)
X_test_smoothed = savgol_filter(X_test_imputed, window_length=5, polyorder=2, axis=1)

In [None]:
# Feature extraction
def extract_features(ndvi_series):
    ndvi_series = np.array(ndvi_series)
    features = [
        np.nanmean(ndvi_series),
        np.nanstd(ndvi_series),
        np.nanmin(ndvi_series),
        np.nanmax(ndvi_series),
        np.nanmedian(ndvi_series),
        np.nanpercentile(ndvi_series, 25),
        np.nanpercentile(ndvi_series, 75),
        np.nanmean(np.diff(ndvi_series))
    ]
    return features

In [None]:
X_features = np.array([extract_features(row) for row in X_smoothed])
X_test_features = np.array([extract_features(row) for row in X_test_smoothed])

In [None]:
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_features)
X_test_scaled = scaler.transform(X_test_features)

In [None]:
# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y_raw)

In [None]:
# Train Logistic Regression with cross-validation
model = LogisticRegression(max_iter=2000, multi_class='multinomial', solver='lbfgs')
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X_scaled, y_encoded, cv=cv, scoring='accuracy')
print(f"Cross-Validation Accuracy: {scores.mean():.4f}")

In [None]:
# Train final model
model.fit(X_scaled, y_encoded)

In [None]:
# Predict on test data
y_test_pred = model.predict(X_test_scaled)
test_df['class'] = le.inverse_transform(y_test_pred)

In [None]:
# Prepare submission
submission = test_df[['ID', 'class']]
submission.to_csv("submission.csv", index=False)