All the results are compared in the presentation. Notebooks are just for the calculations.

In [17]:
import json
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn

from models import MyCNN
from numpy.random import choice
from random import sample
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from torch.autograd import Variable
from utils import custom_split_train_test

In [18]:
os.listdir('data')    

['Dewas_NDVI.csv',
 'Dewas_points.csv',
 'Kaithal_NDVI.csv',
 'Kaithal_points.csv',
 'Karnal_NDVI.csv',
 'Karnal_points.csv']

In [19]:
data_points = pd.read_csv('data/Kaithal_points.csv')

In [20]:
data_ndvi = pd.read_csv('data/Kaithal_NDVI.csv')

In [21]:
data = data_points.merge(data_ndvi, left_on='gfid', right_on='gfid')

In [22]:
data['month'] = data['date'].apply(pd.to_datetime).dt.month
data['day'] = data['date'].apply(pd.to_datetime).dt.day

In [23]:
data_ndvi.head()

Unnamed: 0,gfid,datenum,date,ndvi
0,52001,0,2020-10-20,0.238
1,52001,1,2020-10-21,0.235
2,52001,2,2020-10-22,0.233
3,52001,3,2020-10-23,0.231
4,52001,4,2020-10-24,0.228


We will take only data with at least 160 values at a time series.

In [24]:
i = 1
ids_not_full = []
for id_ in data_points['gfid'].values:
    shp = data_ndvi[data_ndvi['gfid'] == id_].shape[0]
    if shp <= 160:
        i += 1
        ids_not_full += [id_]

In [25]:
# Take only gfids with at least 160 values.
data2 = data[~data['gfid'].isin(ids_not_full)]

In [26]:
# Remember those gfids.
gfids = data2['gfid'].unique()

In [31]:
np.random.seed(42)
np.random.shuffle(gfids)

# Randomly split data into train and test.
train_ids = gfids[:150]
test_ids = gfids[150:]

In [32]:
# Save train and test gfids for classic ML experiments.
indices = {}
indices['train'] = list([int(x) for x in train_ids])
indices['test'] = list([int(x) for x in test_ids])

with open('Kaithal.json', 'w') as f:
    json.dump(indices, f)

In [17]:
data2.head()

Unnamed: 0,gfid,state,district,village,lon,lat,wheat,datenum,date,ndvi,month,day
0,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,0,2020-10-20,0.238,10,20
1,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,1,2020-10-21,0.235,10,21
2,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,2,2020-10-22,0.233,10,22
3,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,3,2020-10-23,0.231,10,23
4,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,4,2020-10-24,0.228,10,24


In [18]:
data2 = data2.dropna()

In [19]:
data2.shape

(38417, 12)

In [21]:
datasets_train, datasets_test, y_train, y_test = custom_split_train_test(data2, data_points, train_ids, test_ids)

In [22]:
data_train = torch.stack([torch.Tensor(dataset) for dataset in datasets_train])
data_test = torch.stack([torch.Tensor(dataset) for dataset in datasets_test])

In [25]:
# Initialize CNN for 2 classes.
MyCNN = MyCNN(num_classes=2)

In [26]:
criterion = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(params=MyCNN.parameters(), lr=0.0001)

In [27]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x156a12fc6d0>

In [28]:
y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_test))

y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_test_tensors = y_test_tensors.type(torch.LongTensor)

In [29]:
X = data_train
y = y_train_tensors

In [30]:
# Train 100 epochs with batch size 30.
res = []
n_epochs = 100
batch_size = 30

for epoch in range(n_epochs):

    # X is a torch Variable
    permutation = torch.randperm(X.size()[0])

    for i in range(0, X.size()[0], batch_size):
        optimizer.zero_grad()

        indices = permutation[i:i + batch_size]
        batch_x, batch_y = X[indices], y[indices]

        outputs = MyCNN.forward(batch_x) #forward pass

        # obtain the loss function
        loss = criterion(outputs, batch_y)
        res.append(loss)
        loss.backward() #calculates the loss of the loss function

        optimizer.step() #improve from loss, i.e backprop
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

Epoch: 0, loss: 0.69290
Epoch: 1, loss: 0.69343
Epoch: 2, loss: 0.69161
Epoch: 3, loss: 0.69159
Epoch: 4, loss: 0.69002
Epoch: 5, loss: 0.69269
Epoch: 6, loss: 0.69548
Epoch: 7, loss: 0.68522
Epoch: 8, loss: 0.68994
Epoch: 9, loss: 0.68749
Epoch: 10, loss: 0.69264
Epoch: 11, loss: 0.68804
Epoch: 12, loss: 0.69150
Epoch: 13, loss: 0.69480
Epoch: 14, loss: 0.68881
Epoch: 15, loss: 0.68533
Epoch: 16, loss: 0.67475
Epoch: 17, loss: 0.68363
Epoch: 18, loss: 0.66822
Epoch: 19, loss: 0.66564
Epoch: 20, loss: 0.67542
Epoch: 21, loss: 0.67078
Epoch: 22, loss: 0.67319
Epoch: 23, loss: 0.65149
Epoch: 24, loss: 0.64525
Epoch: 25, loss: 0.65516
Epoch: 26, loss: 0.66336
Epoch: 27, loss: 0.67866
Epoch: 28, loss: 0.68821
Epoch: 29, loss: 0.68315
Epoch: 30, loss: 0.64467
Epoch: 31, loss: 0.61441
Epoch: 32, loss: 0.66381
Epoch: 33, loss: 0.65487
Epoch: 34, loss: 0.65365
Epoch: 35, loss: 0.66326
Epoch: 36, loss: 0.63450
Epoch: 37, loss: 0.68182
Epoch: 38, loss: 0.63868
Epoch: 39, loss: 0.63957
Epoch: 40,

In [31]:
X_test_tensors = data_test

In [32]:
test_predict = MyCNN(X_test_tensors)

In [34]:
test_predict.argmax(dim=1)

tensor([0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
        0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1])

In [35]:
print(classification_report(y_test_tensors, torch.argmax(test_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.50      0.39      0.44        18
           1       0.61      0.71      0.65        24

    accuracy                           0.57        42
   macro avg       0.55      0.55      0.55        42
weighted avg       0.56      0.57      0.56        42



In [36]:
accuracy_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.5714285714285714

In [37]:
roc_auc_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.5486111111111112

Also get results for train to detest over/under training.

In [38]:
train_predict = MyCNN(X)

In [39]:
print(classification_report(y_train_tensors, torch.argmax(train_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.74      0.49      0.59        70
           1       0.65      0.85      0.74        80

    accuracy                           0.68       150
   macro avg       0.70      0.67      0.66       150
weighted avg       0.69      0.68      0.67       150



In [40]:
accuracy_score(y_train_tensors, torch.argmax(train_predict, dim=1))

0.68

In [41]:
roc_auc_score(y_train_tensors, torch.argmax(train_predict, dim=1))

0.6678571428571429

All the results are compared in the presentation. Notebooks are just for the calculations.