All the results are compared in the presentation. Notebooks are just for the calculations.

In [1]:
import json
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn

from models import MyCNN
from numpy.random import choice
from random import sample
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from torch.autograd import Variable
from utils import custom_split_train_test

In [2]:
os.listdir('data')    

['Dewas_NDVI.csv',
 'Dewas_points.csv',
 'Kaithal_NDVI.csv',
 'Kaithal_points.csv',
 'Karnal_NDVI.csv',
 'Karnal_points.csv']

In [3]:
data_points = pd.read_csv('data/Karnal_points.csv')

In [4]:
data_ndvi = pd.read_csv('data/Karnal_NDVI.csv')

In [5]:
data = data_points.merge(data_ndvi, left_on='gfid', right_on='gfid')

In [6]:
data['month'] = data['date'].apply(pd.to_datetime).dt.month
data['day'] = data['date'].apply(pd.to_datetime).dt.day

In [7]:
data_ndvi.head()

Unnamed: 0,gfid,datenum,date,ndvi
0,54001,0,2020-10-20,0.737
1,54001,1,2020-10-21,0.727
2,54001,2,2020-10-22,0.714
3,54001,3,2020-10-23,0.697
4,54001,4,2020-10-24,0.676


We will take only data with at least 160 values at a time series.

In [8]:
i = 1
ids_not_full = []
for id_ in data_points['gfid'].values:
    shp = data_ndvi[data_ndvi['gfid'] == id_].shape[0]
    if shp <= 160:
        i += 1
        ids_not_full += [id_]

In [9]:
# Take onli gfids with at least 160 values.
data2 = data[~data['gfid'].isin(ids_not_full)]

In [10]:
# Remember those gfids.
gfids = data2['gfid'].unique()

In [11]:
np.random.seed(42)
np.random.shuffle(gfids)

# Randomly split data into train and test.
train_ids = gfids[:210]
test_ids = gfids[210:]

In [12]:
# Save train and test gfids for classic ML experiments.
indices = {}
indices['train'] = list([int(x) for x in train_ids])
indices['test'] = list([int(x) for x in test_ids])

with open('Karnal.json', 'w') as f:
    json.dump(indices, f)

In [17]:
data2.head()

Unnamed: 0,gfid,state,district,village,lon,lat,wheat,datenum,date,ndvi,month,day
0,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,0,2020-10-20,0.737,10,20
1,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,1,2020-10-21,0.727,10,21
2,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,2,2020-10-22,0.714,10,22
3,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,3,2020-10-23,0.697,10,23
4,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,4,2020-10-24,0.676,10,24


In [18]:
data2 = data2.dropna()

In [19]:
data2.shape

(53505, 12)

In [None]:
datasets_train, datasets_test, y_train, y_test = custom_split_train_test(data2, data_points, train_ids, test_ids)

In [22]:
data_train = torch.stack([torch.Tensor(dataset) for dataset in datasets_train])
data_test = torch.stack([torch.Tensor(dataset) for dataset in datasets_test])

In [25]:
# Initialize CNN for 2 classes.
MyCNN = MyCNN(num_classes=2)

In [26]:
criterion = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(params=MyCNN.parameters(), lr=0.0001)

In [27]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x227f5274dc0>

In [28]:
y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_test))

y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_test_tensors = y_test_tensors.type(torch.LongTensor)

In [29]:
X = data_train
y = y_train_tensors

In [30]:
# Train 100 epochs with batch size 30.
res = []
n_epochs = 100
batch_size = 30

for epoch in range(n_epochs):

    # X is a torch Variable
    permutation = torch.randperm(X.size()[0])

    for i in range(0, X.size()[0], batch_size):
        optimizer.zero_grad()

        indices = permutation[i:i + batch_size]
        batch_x, batch_y = X[indices], y[indices]

        outputs = MyCNN.forward(batch_x) #forward pass

        # obtain the loss function
        loss = criterion(outputs, batch_y)
        res.append(loss)
        loss.backward() #calculates the loss of the loss function

        optimizer.step() #improve from loss, i.e backprop
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

Epoch: 0, loss: 0.68721
Epoch: 1, loss: 0.67702
Epoch: 2, loss: 0.69775
Epoch: 3, loss: 0.67789
Epoch: 4, loss: 0.70636
Epoch: 5, loss: 0.67549
Epoch: 6, loss: 0.63633
Epoch: 7, loss: 0.69148
Epoch: 8, loss: 0.68334
Epoch: 9, loss: 0.68927
Epoch: 10, loss: 0.68439
Epoch: 11, loss: 0.64875
Epoch: 12, loss: 0.62527
Epoch: 13, loss: 0.64275
Epoch: 14, loss: 0.64285
Epoch: 15, loss: 0.70570
Epoch: 16, loss: 0.61031
Epoch: 17, loss: 0.63051
Epoch: 18, loss: 0.59401
Epoch: 19, loss: 0.65769
Epoch: 20, loss: 0.63224
Epoch: 21, loss: 0.54587
Epoch: 22, loss: 0.62619
Epoch: 23, loss: 0.62445
Epoch: 24, loss: 0.57957
Epoch: 25, loss: 0.62494
Epoch: 26, loss: 0.64481
Epoch: 27, loss: 0.58387
Epoch: 28, loss: 0.73108
Epoch: 29, loss: 0.60892
Epoch: 30, loss: 0.66149
Epoch: 31, loss: 0.60979
Epoch: 32, loss: 0.62212
Epoch: 33, loss: 0.48903
Epoch: 34, loss: 0.64950
Epoch: 35, loss: 0.69438
Epoch: 36, loss: 0.75232
Epoch: 37, loss: 0.51765
Epoch: 38, loss: 0.58584
Epoch: 39, loss: 0.53924
Epoch: 40,

In [31]:
X_test_tensors = data_test

In [32]:
test_predict = MyCNN(X_test_tensors)

In [34]:
test_predict.argmax(dim=1)

tensor([0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,
        1, 0, 1, 1, 1, 1, 1, 0, 0, 1])

In [35]:
print(classification_report(y_test_tensors, torch.argmax(test_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.64      0.70      0.67        20
           1       0.83      0.79      0.81        38

    accuracy                           0.76        58
   macro avg       0.73      0.74      0.74        58
weighted avg       0.77      0.76      0.76        58



In [36]:
accuracy_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.7586206896551724

In [37]:
roc_auc_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.7447368421052631

Also get results for train to detest over/under training.

In [38]:
train_predict = MyCNN(X)

In [39]:
print(classification_report(y_train_tensors, torch.argmax(train_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.68      0.67      0.67        88
           1       0.76      0.77      0.77       122

    accuracy                           0.73       210
   macro avg       0.72      0.72      0.72       210
weighted avg       0.73      0.73      0.73       210



In [40]:
accuracy_score(y_train_tensors, torch.argmax(train_predict, dim=1))

0.7285714285714285

In [41]:
roc_auc_score(y_train_tensors, torch.argmax(train_predict, dim=1))

0.720473174366617

All the results are compared in the presentation. Notebooks are just for the calculations.