In [27]:
import os
import pandas as pd
import torch
import torch.nn as nn

from random import sample
from sklearn.metrics import f1_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from torch.autograd import Variable 

In [2]:
district ='Kaithal'

In [3]:
os.listdir('data')    

['Dewas_NDVI.csv',
 'Dewas_points.csv',
 'Kaithal_NDVI.csv',
 'Kaithal_points.csv',
 'Karnal_NDVI.csv',
 'Karnal_points.csv']

In [4]:
data_points = pd.read_csv(f'data/{district}_points.csv')

In [5]:
data_ndvi = pd.read_csv(f'data/{district}_NDVI.csv')

In [6]:
data = data_points.merge(data_ndvi, left_on='gfid', right_on='gfid')

In [7]:
data.head()

Unnamed: 0,gfid,state,district,village,lon,lat,wheat,datenum,date,ndvi
0,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,0,2020-10-20,0.238
1,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,1,2020-10-21,0.235
2,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,2,2020-10-22,0.233
3,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,3,2020-10-23,0.231
4,52001,Haryana,Kaithal,Sirghar,76.420677,29.782459,1,4,2020-10-24,0.228


In [8]:
data_ndvi.head()

Unnamed: 0,gfid,datenum,date,ndvi
0,52001,0,2020-10-20,0.238
1,52001,1,2020-10-21,0.235
2,52001,2,2020-10-22,0.233
3,52001,3,2020-10-23,0.231
4,52001,4,2020-10-24,0.228


In [9]:
with open(f'{district}.json') as json_file:
    indices= json.load(json_file)
train = indices['train']
test = indices['test']

In [10]:
features = ['ndvi']
target = ['wheat']

In [11]:
X_train = []
X_test = []
for id_ in train:
    d = data.loc[data['gfid']==id_, features].iloc[:150]
    X_train.append(d)
for id_ in test:
    d = data.loc[data['gfid']==id_, features].iloc[:150]
    X_test.append(d)
X_train = pd.concat(X_train)
X_test = pd.concat(X_test)

In [12]:
h_1 = len(train)
h_2 = len(test)
w = 150
c = len(features)

X_train_tensors = Variable(torch.Tensor(X_train.values.reshape(h_1, w, c)))
X_test_tensors = Variable(torch.Tensor(X_test.values.reshape(h_2, w, c)))

y_train_tensors = Variable(torch.Tensor(data_points.loc[data_points['gfid'].isin(train), 'wheat'].values))
y_test_tensors = Variable(torch.Tensor(data_points.loc[data_points['gfid'].isin(test), 'wheat'].values)) 

y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_test_tensors = y_test_tensors.type(torch.LongTensor)

In [13]:
class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers,
                 seq_length):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length

        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True)
        
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.3, inplace=True),
            nn.Linear(in_features=(hidden_size), out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=num_classes),
            nn.Sigmoid()
        )
    def forward(self, x):
        h_0 = Variable(
            torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c_0 = Variable(
            torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        output, (hn, cn) = self.lstm(x, (h_0, c_0))
        hn = hn.view(-1, self.hidden_size)
        out = self.classifier(hn)
        return out

In [14]:
input_size = len(features) #number of features
hidden_size = 32 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = 2 #number of output classes 
batch_size = 10

In [15]:
lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors.shape[1]) #our lstm class

In [16]:
criterion = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.SGD(lstm1.parameters(), lr=0.001, momentum=0.9)

In [17]:
num_epochs = 100
for epoch in range(num_epochs):
    
    optimizer.zero_grad() #caluclate the gradient, manually setting to 0
    outputs = lstm1.forward(X_train_tensors) #forward pass

    # obtain the loss function
    loss = criterion(outputs, y_train_tensors)
    loss.backward() #calculates the loss of the loss function

    optimizer.step() #improve from loss, i.e backprop
    if epoch % 10 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

Epoch: 0, loss: 0.69265
Epoch: 10, loss: 0.69255
Epoch: 20, loss: 0.69233
Epoch: 30, loss: 0.69261
Epoch: 40, loss: 0.69246
Epoch: 50, loss: 0.69247
Epoch: 60, loss: 0.69242
Epoch: 70, loss: 0.69253
Epoch: 80, loss: 0.69267
Epoch: 90, loss: 0.69260


In [18]:
train_predict = lstm1(X_train_tensors)#forward pass 

In [19]:
train_predict

tensor([[0.4884, 0.5046],
        [0.4877, 0.5085],
        [0.4826, 0.5057],
        [0.4861, 0.5059],
        [0.4850, 0.5030],
        [0.4821, 0.5032],
        [0.4812, 0.5045],
        [0.4830, 0.5050],
        [0.4815, 0.5054],
        [0.4849, 0.5063],
        [0.4852, 0.5067],
        [0.4788, 0.5018],
        [0.4859, 0.5054],
        [0.4836, 0.5096],
        [0.4861, 0.5103],
        [0.4811, 0.5058],
        [0.4811, 0.5073],
        [0.4870, 0.5056],
        [0.4827, 0.5057],
        [0.4860, 0.5060],
        [0.4834, 0.5066],
        [0.4842, 0.5091],
        [0.4848, 0.5052],
        [0.4819, 0.5076],
        [0.4861, 0.5070],
        [0.4819, 0.5029],
        [0.4809, 0.5035],
        [0.4842, 0.5092],
        [0.4889, 0.5059],
        [0.4822, 0.5056],
        [0.4828, 0.5073],
        [0.4803, 0.5043],
        [0.4829, 0.5050],
        [0.4896, 0.5032],
        [0.4781, 0.5043],
        [0.4849, 0.5078],
        [0.4814, 0.5050],
        [0.4897, 0.5033],
        [0.4

In [20]:
train_predict.argmax(dim=1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1])

In [21]:
print(classification_report(y_train_tensors, torch.argmax(train_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        70
           1       0.53      1.00      0.70        80

    accuracy                           0.53       150
   macro avg       0.27      0.50      0.35       150
weighted avg       0.28      0.53      0.37       150



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
test_predict = lstm1.forward(X_test_tensors)

In [23]:
test_predict.argmax(dim=1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [24]:
print(classification_report(y_test_tensors, torch.argmax(test_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        18
           1       0.57      1.00      0.73        24

    accuracy                           0.57        42
   macro avg       0.29      0.50      0.36        42
weighted avg       0.33      0.57      0.42        42



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
roc_auc_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.5