All the results are compared in the presentation. Notebooks are just for the calculations.

In [1]:
import json
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn

from models import MyCNN
from numpy.random import choice
from random import sample
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from torch.autograd import Variable
from utils import custom_split_train_test

In [2]:
os.listdir('data')    

['Dewas_NDVI.csv',
 'Dewas_points.csv',
 'Kaithal_NDVI.csv',
 'Kaithal_points.csv',
 'Karnal_NDVI.csv',
 'Karnal_points.csv']

In [3]:
data_points = pd.read_csv('data/Dewas_points.csv')

In [4]:
data_ndvi = pd.read_csv('data/Dewas_NDVI.csv')

In [5]:
data = data_points.merge(data_ndvi, left_on='gfid', right_on='gfid')

In [6]:
data['month'] = data['date'].apply(pd.to_datetime).dt.month
data['day'] = data['date'].apply(pd.to_datetime).dt.day

In [7]:
data_ndvi.head()

Unnamed: 0,gfid,datenum,date,ndvi
0,72001,0,2020-10-20,0.186
1,72001,1,2020-10-21,0.184
2,72001,2,2020-10-22,0.183
3,72001,3,2020-10-23,0.182
4,72001,4,2020-10-24,0.181


We will take only data with at least 203 values at a time series.

In [8]:
i = 1
ids_not_full = []
for id_ in data_points['gfid'].values:
    shp = data_ndvi[data_ndvi['gfid'] == id_].shape[0]
    if shp != 203:
        i += 1
        ids_not_full += [id_]

In [9]:
# Take only gfids with at least 160 values.
data2 = data[~data['gfid'].isin(ids_not_full)]

In [11]:
# Remember those gfids.
gfids = data2['gfid'].unique()

In [16]:
np.random.seed(42)
np.random.shuffle(gfids)

# Randomly split data into train and test.
train_ids = gfids[:300]
test_ids = gfids[300:]

In [17]:
# Save train and test gfids for classic ML experiments.
indices = {}
indices['train'] = list([int(x) for x in train_ids])
indices['test'] = list([int(x) for x in test_ids])

with open('Dewas.json', 'w') as f:
    json.dump(indices, f)

In [19]:
data2.head()

Unnamed: 0,gfid,state,district,village,lon,lat,wheat,datenum,date,ndvi,month,day
0,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,0,2020-10-20,0.186,10,20
1,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,1,2020-10-21,0.184,10,21
2,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,2,2020-10-22,0.183,10,22
3,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,3,2020-10-23,0.182,10,23
4,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,4,2020-10-24,0.181,10,24


In [20]:
data2 = data2.dropna()

In [21]:
data2.shape

(76125, 12)

In [23]:
datasets_train, datasets_test, y_train, y_test = custom_split_train_test(data2, data_points, train_ids, test_ids)

In [24]:
data_train = torch.stack([torch.Tensor(dataset) for dataset in datasets_train])
data_test = torch.stack([torch.Tensor(dataset) for dataset in datasets_test])

In [27]:
# Initialize CNN for 2 classes.
MyCNN = MyCNN(num_classes=2)

In [28]:
criterion = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(params=MyCNN.parameters(), lr=0.0001)#1, weight_decay=0.5)

In [29]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x24940eb1be0>

In [30]:
y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_test))

y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_test_tensors = y_test_tensors.type(torch.LongTensor)

In [31]:
X = data_train
y = y_train_tensors

In [33]:
# Train 100 epochs with batch size 30.
res = []
n_epochs = 100
batch_size = 30

for epoch in range(n_epochs):

    # X is a torch Variable
    permutation = torch.randperm(X.size()[0])

    for i in range(0, X.size()[0], batch_size):
        optimizer.zero_grad()

        indices = permutation[i:i + batch_size]
        batch_x, batch_y = X[indices], y[indices]

        outputs = MyCNN.forward(batch_x) #forward pass

        # obtain the loss function
        loss = criterion(outputs, batch_y)
        res.append(loss)
        loss.backward() #calculates the loss of the loss function

        optimizer.step() #improve from loss, i.e backprop
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

Epoch: 0, loss: 0.69055
Epoch: 1, loss: 0.69177
Epoch: 2, loss: 0.69123
Epoch: 3, loss: 0.69005
Epoch: 4, loss: 0.68600
Epoch: 5, loss: 0.68254
Epoch: 6, loss: 0.67728
Epoch: 7, loss: 0.65779
Epoch: 8, loss: 0.65555
Epoch: 9, loss: 0.63226
Epoch: 10, loss: 0.63890
Epoch: 11, loss: 0.65890
Epoch: 12, loss: 0.65263
Epoch: 13, loss: 0.62813
Epoch: 14, loss: 0.64518
Epoch: 15, loss: 0.60604
Epoch: 16, loss: 0.71959
Epoch: 17, loss: 0.54802
Epoch: 18, loss: 0.63301
Epoch: 19, loss: 0.59190
Epoch: 20, loss: 0.61295
Epoch: 21, loss: 0.61179
Epoch: 22, loss: 0.69637
Epoch: 23, loss: 0.68450
Epoch: 24, loss: 0.60049
Epoch: 25, loss: 0.70719
Epoch: 26, loss: 0.58988
Epoch: 27, loss: 0.67193
Epoch: 28, loss: 0.59992
Epoch: 29, loss: 0.60697
Epoch: 30, loss: 0.59032
Epoch: 31, loss: 0.59542
Epoch: 32, loss: 0.56685
Epoch: 33, loss: 0.62591
Epoch: 34, loss: 0.57419
Epoch: 35, loss: 0.56667
Epoch: 36, loss: 0.63944
Epoch: 37, loss: 0.66478
Epoch: 38, loss: 0.61943
Epoch: 39, loss: 0.66849
Epoch: 40,

In [34]:
X_test_tensors = data_test

In [35]:
test_predict = MyCNN(X_test_tensors)

In [37]:
test_predict.argmax(dim=1)

tensor([0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
        1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1,
        1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
        0, 0, 1])

In [38]:
print(classification_report(y_test_tensors, torch.argmax(test_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.83      0.75      0.79        40
           1       0.74      0.83      0.78        35

    accuracy                           0.79        75
   macro avg       0.79      0.79      0.79        75
weighted avg       0.79      0.79      0.79        75



In [39]:
accuracy_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.7866666666666666

In [40]:
roc_auc_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.7892857142857143

Also get results for train to detest over/under training.

In [41]:
train_predict = MyCNN(X)

In [42]:
print(classification_report(y_train_tensors, torch.argmax(train_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.77      0.72      0.75       157
           1       0.71      0.77      0.74       143

    accuracy                           0.74       300
   macro avg       0.74      0.74      0.74       300
weighted avg       0.75      0.74      0.74       300



In [43]:
accuracy_score(y_train_tensors, torch.argmax(train_predict, dim=1))

0.7433333333333333

In [44]:
roc_auc_score(y_train_tensors, torch.argmax(train_predict, dim=1))

0.7444879960803528

All the results are compared in the presentation. Notebooks are just for the calculations.