In [48]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns




In [49]:
train_df = pd.read_csv("hacktrain.csv")
ndvi_cols = [col for col in train_df.columns if '_N' in col]


In [50]:
train_df.isnull().sum().sort_values(ascending=False)

20140423_N    1760
20140626_N    1600
20140218_N    1440
20141016_N    1440
20150226_N    1360
20141117_N    1280
20140117_N    1200
20150602_N    1200
20140322_N    1120
20150330_N    1120
20150125_N    1040
20150501_N     960
20140509_N     880
20150109_N     880
20140930_N     800
20150517_N     800
20150314_N     720
20140525_N     720
20150210_N     640
20140407_N     640
20140202_N     560
20140813_N     560
20150720_N     560
20150415_N     480
20140610_N     480
20141101_N     400
20140101_N     400
ID               0
class            0
Unnamed: 0       0
dtype: int64

In [51]:
train_df[ndvi_cols] = train_df[ndvi_cols].interpolate(axis=1, limit_direction='both')


In [52]:
imputer = SimpleImputer(strategy='mean')
train_df[ndvi_cols] = imputer.fit_transform(train_df[ndvi_cols])

In [53]:
scaler = StandardScaler()
train_df[ndvi_cols] = scaler.fit_transform(train_df[ndvi_cols])


In [54]:
X = train_df[ndvi_cols]
y = train_df['class']


In [55]:
y_encoded = y.astype('category').cat.codes
label_mapping = dict(enumerate(y.astype('category').cat.categories))


In [56]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=500)
model.fit(X, y_encoded)




In [57]:

test_df = pd.read_csv("hacktest.csv")


test_df[ndvi_cols] = test_df[ndvi_cols].interpolate(axis=1, limit_direction='both')


from sklearn.impute import SimpleImputer
test_df[ndvi_cols] = SimpleImputer(strategy='mean').fit_transform(test_df[ndvi_cols])


test_df[ndvi_cols] = scaler.transform(test_df[ndvi_cols])


In [58]:

test_preds_encoded = model.predict(test_df[ndvi_cols])


test_preds_labels = [label_mapping[i] for i in test_preds_encoded]


In [59]:

submission = pd.DataFrame({
    'ID': test_df['ID'],
    'class': test_preds_labels
})


submission.to_csv("submission.csv", index=False)


In [60]:
train_accuracy = model.score(X, y_encoded)
print("Training Accuracy:", train_accuracy)

Training Accuracy: 0.868375


In [61]:



submission = pd.read_csv("submission.csv")


print(submission.head())


print("\nMissing values:\n", submission.isnull().sum())
print("\nUnique class labels:\n", submission['class'].unique())


   ID    class
0   1  orchard
1   2   forest
2   3  orchard
3   4   forest
4   5   forest

Missing values:
 ID       0
class    0
dtype: int64

Unique class labels:
 ['orchard' 'forest' 'farm' 'water' 'impervious' 'grass']
