In [25]:
import os
import pandas as pd
import torch
import torch.nn as nn

from random import sample
from sklearn.metrics import f1_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from torch.autograd import Variable 

In [2]:
district ='Karnal'

In [3]:
os.listdir('data')    

['Dewas_NDVI.csv',
 'Dewas_points.csv',
 'Kaithal_NDVI.csv',
 'Kaithal_points.csv',
 'Karnal_NDVI.csv',
 'Karnal_points.csv']

In [4]:
data_points = pd.read_csv(f'data/{district}_points.csv')

In [5]:
data_ndvi = pd.read_csv(f'data/{district}_NDVI.csv')

In [6]:
data = data_points.merge(data_ndvi, left_on='gfid', right_on='gfid')

In [7]:
data.head()

Unnamed: 0,gfid,state,district,village,lon,lat,wheat,datenum,date,ndvi
0,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,0,2020-10-20,0.737
1,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,1,2020-10-21,0.727
2,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,2,2020-10-22,0.714
3,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,3,2020-10-23,0.697
4,54001,Haryana,Karnal,Kabulpur Khera,76.707176,29.55072,1,4,2020-10-24,0.676


In [8]:
data_ndvi.head()

Unnamed: 0,gfid,datenum,date,ndvi
0,54001,0,2020-10-20,0.737
1,54001,1,2020-10-21,0.727
2,54001,2,2020-10-22,0.714
3,54001,3,2020-10-23,0.697
4,54001,4,2020-10-24,0.676


In [9]:
with open(f'{district}.json') as json_file:
    indices= json.load(json_file)
train = indices['train']
test = indices['test']

In [10]:
features = ['ndvi']
target = ['wheat']

In [11]:
X_train = []
X_test = []
for id_ in train:
    d = data.loc[data['gfid']==id_, features].iloc[:150]
    X_train.append(d)
for id_ in test:
    d = data.loc[data['gfid']==id_, features].iloc[:150]
    X_test.append(d)
X_train = pd.concat(X_train)
X_test = pd.concat(X_test)

In [12]:
h_1 = len(train)
h_2 = len(test)
w = 150
c = len(features)

X_train_tensors = Variable(torch.Tensor(X_train.values.reshape(h_1, w, c)))
X_test_tensors = Variable(torch.Tensor(X_test.values.reshape(h_2, w, c)))

y_train_tensors = Variable(torch.Tensor(data_points.loc[data_points['gfid'].isin(train), 'wheat'].values))
y_test_tensors = Variable(torch.Tensor(data_points.loc[data_points['gfid'].isin(test), 'wheat'].values)) 

y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_test_tensors = y_test_tensors.type(torch.LongTensor)

In [13]:
class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers,
                 seq_length):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length

        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True)
        
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.3, inplace=True),
            nn.Linear(in_features=(hidden_size), out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=num_classes),
            nn.Sigmoid()
        )
    def forward(self, x):
        h_0 = Variable(
            torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c_0 = Variable(
            torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        output, (hn, cn) = self.lstm(x, (h_0, c_0))
        hn = hn.view(-1, self.hidden_size)
        out = self.classifier(hn)
        return out

In [14]:
input_size = len(features) #number of features
hidden_size = 32 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = 2 #number of output classes 
batch_size = 10

In [15]:
lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors.shape[1]) #our lstm class

In [16]:
criterion = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.SGD(lstm1.parameters(), lr=0.001, momentum=0.9)

In [17]:
num_epochs = 100
for epoch in range(num_epochs):
    
    optimizer.zero_grad() #caluclate the gradient, manually setting to 0
    outputs = lstm1.forward(X_train_tensors) #forward pass

    # obtain the loss function
    loss = criterion(outputs, y_train_tensors)
    loss.backward() #calculates the loss of the loss function

    optimizer.step() #improve from loss, i.e backprop
    if epoch % 10 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

Epoch: 0, loss: 0.69407
Epoch: 10, loss: 0.69409
Epoch: 20, loss: 0.69392
Epoch: 30, loss: 0.69371
Epoch: 40, loss: 0.69330
Epoch: 50, loss: 0.69346
Epoch: 60, loss: 0.69342
Epoch: 70, loss: 0.69329
Epoch: 80, loss: 0.69315
Epoch: 90, loss: 0.69291


In [18]:
train_predict = lstm1(X_train_tensors)#forward pass 

In [19]:
train_predict

tensor([[0.4887, 0.4905],
        [0.4869, 0.4921],
        [0.4830, 0.4963],
        [0.4841, 0.4964],
        [0.4877, 0.4983],
        [0.4867, 0.4832],
        [0.4873, 0.4866],
        [0.4826, 0.4841],
        [0.4822, 0.4892],
        [0.4759, 0.4902],
        [0.4814, 0.4902],
        [0.4853, 0.4919],
        [0.4867, 0.4998],
        [0.4866, 0.4882],
        [0.4882, 0.4823],
        [0.4863, 0.4946],
        [0.4854, 0.4911],
        [0.4858, 0.4873],
        [0.4835, 0.4872],
        [0.4891, 0.4895],
        [0.4805, 0.4847],
        [0.4832, 0.4843],
        [0.4846, 0.4920],
        [0.4846, 0.4848],
        [0.4833, 0.4948],
        [0.4866, 0.4916],
        [0.4876, 0.4890],
        [0.4846, 0.4989],
        [0.4823, 0.4845],
        [0.4836, 0.4849],
        [0.4855, 0.4894],
        [0.4796, 0.4881],
        [0.4850, 0.4940],
        [0.4857, 0.4853],
        [0.4873, 0.4876],
        [0.4870, 0.4913],
        [0.4876, 0.4924],
        [0.4870, 0.4910],
        [0.4

In [20]:
train_predict.argmax(dim=1)

tensor([1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1])

In [21]:
print(classification_report(y_train_tensors, torch.argmax(train_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.47      0.09      0.15        88
           1       0.59      0.93      0.72       122

    accuracy                           0.58       210
   macro avg       0.53      0.51      0.43       210
weighted avg       0.54      0.58      0.48       210



In [22]:
test_predict = lstm1.forward(X_test_tensors)

In [23]:
test_predict.argmax(dim=1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
        1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [24]:
print(classification_report(y_test_tensors, torch.argmax(test_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.75      0.15      0.25        20
           1       0.69      0.97      0.80        38

    accuracy                           0.69        58
   macro avg       0.72      0.56      0.53        58
weighted avg       0.71      0.69      0.61        58



In [26]:
roc_auc_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.5618421052631579