In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')


In [79]:
train = pd.read_csv("hacktrain.csv")
test = pd.read_csv("hacktest.csv")



In [80]:
print(train.shape)
print(train.columns)

print(train['class'].value_counts())
train.head()


(8000, 30)
Index(['Unnamed: 0', 'ID', 'class', '20150720_N', '20150602_N', '20150517_N',
       '20150501_N', '20150415_N', '20150330_N', '20150314_N', '20150226_N',
       '20150210_N', '20150125_N', '20150109_N', '20141117_N', '20141101_N',
       '20141016_N', '20140930_N', '20140813_N', '20140626_N', '20140610_N',
       '20140525_N', '20140509_N', '20140423_N', '20140407_N', '20140322_N',
       '20140218_N', '20140202_N', '20140117_N', '20140101_N'],
      dtype='object')
class
forest        6159
farm           841
impervious     669
grass          196
water          105
orchard         30
Name: count, dtype: int64


Unnamed: 0.1,Unnamed: 0,ID,class,20150720_N,20150602_N,20150517_N,20150501_N,20150415_N,20150330_N,20150314_N,...,20140610_N,20140525_N,20140509_N,20140423_N,20140407_N,20140322_N,20140218_N,20140202_N,20140117_N,20140101_N
0,0,1,water,637.595,658.668,-1882.03,-1924.36,997.904,-1739.99,630.087,...,,-1043.16,-1942.49,267.138,,,211.328,-2203.02,-1180.19,433.906
1,1,2,water,634.24,593.705,-1625.79,-1672.32,914.198,-692.386,707.626,...,,-933.934,-625.385,120.059,364.858,476.972,220.878,-2250.0,-1360.56,524.075
2,3,4,water,58.0174,-1599.16,,-1052.63,,-1564.63,,...,-1025.88,368.622,,-1227.8,304.621,,369.214,-2202.12,,-1343.55
3,4,5,water,72.518,,380.436,-1256.93,515.805,-1413.18,-802.942,...,-1813.95,155.624,,-924.073,432.15,282.833,298.32,-2197.36,,-826.727
4,7,8,water,1136.44,,,1647.83,1935.8,,2158.98,...,1535.0,1959.43,-279.317,-384.915,-113.406,1020.72,1660.65,-116.801,-568.05,-1357.14


In [81]:
ndvi_cols = [col for col in train.columns if col.endswith('_N')]
train[ndvi_cols] = train[ndvi_cols].fillna(train[ndvi_cols].median())
test[ndvi_cols] = test[ndvi_cols].fillna(test[ndvi_cols].median())


In [82]:
def add_features(df):
    df['ndvi_mean'] = df[ndvi_cols].mean(axis=1)
    df['ndvi_std'] = df[ndvi_cols].std(axis=1)
    df['ndvi_max'] = df[ndvi_cols].max(axis=1)
    df['ndvi_min'] = df[ndvi_cols].min(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']
    df['ndvi_q1'] = df[ndvi_cols].quantile(0.25, axis=1)
    df['ndvi_q3'] = df[ndvi_cols].quantile(0.75, axis=1)
    df['ndvi_iqr'] = df['ndvi_q3'] - df['ndvi_q1']
    return df

train = add_features(train)
test = add_features(test)


In [83]:
le = LabelEncoder()
train['class_encoded'] = le.fit_transform(train['class'])


In [84]:
features = ndvi_cols + ['ndvi_mean', 'ndvi_std', 'ndvi_max', 'ndvi_min', 'ndvi_range', 'ndvi_q1', 'ndvi_q3', 'ndvi_iqr']

X = train[features]
y = train['class_encoded']
X_test_final = test[features]

# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test_final)


In [85]:
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

model = LogisticRegression(
    multi_class='multinomial',
    solver='lbfgs',
    max_iter=3000,
    C=1.0,
    random_state=42
)
model.fit(X_train, y_train)


In [86]:
val_preds = model.predict(X_val)
val_acc = accuracy_score(y_val, val_preds)
print(f"Validation Accuracy: {val_acc:.4f}")
print("\nClassification Report:\n", classification_report(y_val, val_preds, target_names=le.classes_))


Validation Accuracy: 0.9081

Classification Report:
               precision    recall  f1-score   support

        farm       0.74      0.60      0.66       168
      forest       0.94      0.97      0.96      1232
       grass       0.85      0.59      0.70        39
  impervious       0.82      0.86      0.84       134
     orchard       0.33      0.17      0.22         6
       water       0.76      0.62      0.68        21

    accuracy                           0.91      1600
   macro avg       0.74      0.63      0.68      1600
weighted avg       0.90      0.91      0.90      1600



In [87]:
test_preds = model.predict(X_test_scaled)
test['class'] = le.inverse_transform(test_preds)


In [88]:
submission = test[['ID', 'class']]
submission.to_csv("submission.csv", index=False)
submission.head()
submission.to_csv("submission.csv", index=False)
