# Summer Analytics 2025 Hackathon
## NDVI Land Cover Classification using Logistic Regression

In [9]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [10]:
train_df = pd.read_csv('hacktrain.csv')
test_df = pd.read_csv('hacktest.csv')

In [11]:
train_df.drop(columns=[col for col in ['Unnamed: 0'] if col in train_df.columns], inplace=True)
test_df.drop(columns=[col for col in ['Unnamed: 0'] if col in test_df.columns], inplace=True)

In [12]:
ndvi_columns = [col for col in train_df.columns if '_N' in col]

train_df[ndvi_columns] = train_df[ndvi_columns].fillna(train_df[ndvi_columns].median())
test_df[ndvi_columns] = test_df[ndvi_columns].fillna(test_df[ndvi_columns].median())

In [13]:
def add_features(df):
    df['NDVI_mean'] = df[ndvi_columns].mean(axis=1)
    df['NDVI_std'] = df[ndvi_columns].std(axis=1)
    df['NDVI_min'] = df[ndvi_columns].min(axis=1)
    df['NDVI_max'] = df[ndvi_columns].max(axis=1)
    df['NDVI_range'] = df['NDVI_max'] - df['NDVI_min']
    return df

train_df = add_features(train_df)
test_df = add_features(test_df)

In [14]:
X = train_df.drop(columns=['ID', 'class'])
y = train_df['class']
X_test = test_df.drop(columns=['ID'])

In [15]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(multi_class='multinomial', max_iter=1000))
])

pipeline.fit(X, y)
print("Model trained!")



Model trained!


In [16]:
preds = pipeline.predict(X_test)
submission = pd.DataFrame({'ID': test_df['ID'], 'class': preds})
submission.to_csv('submission.csv', index=False)
submission

Unnamed: 0,ID,class
0,1,forest
1,2,forest
2,3,orchard
3,4,forest
4,5,forest
...,...,...
2840,2841,water
2841,2842,water
2842,2843,water
2843,2844,water
