In [1]:
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from random import sample
import os

import torch #pytorch
import torch.nn as nn
from torch.autograd import Variable 

сделать матрицу

In [2]:
os.listdir('data')    

['Dewas_NDVI.csv',
 'Dewas_points.csv',
 'Kaithal_NDVI.csv',
 'Kaithal_points.csv',
 'Karnal_NDVI.csv',
 'Karnal_points.csv']

In [3]:
data_points = pd.read_csv('data/Dewas_points.csv')

In [4]:
data_ndvi = pd.read_csv('data/Dewas_NDVI.csv')

In [5]:
data = data_points.merge(data_ndvi, left_on='gfid', right_on='gfid')

In [6]:
data['month'] = data['date'].apply(pd.to_datetime).dt.month
data['day'] = data['date'].apply(pd.to_datetime).dt.day

In [7]:
data_ndvi.head()

Unnamed: 0,gfid,datenum,date,ndvi
0,72001,0,2020-10-20,0.186
1,72001,1,2020-10-21,0.184
2,72001,2,2020-10-22,0.183
3,72001,3,2020-10-23,0.182
4,72001,4,2020-10-24,0.181


In [8]:
i = 1
ids_not_full = []
for id_ in data_points['gfid'].values:
    shp = data_ndvi[data_ndvi['gfid'] == id_].shape[0]
    if shp != 203:
        #print(id_, ' ', shp)
        i += 1
        ids_not_full += [id_]
#print(i)

In [9]:
data2 = data[~data['gfid'].isin(ids_not_full)]

In [10]:
data2[data2['gfid']==72001]['ndvi']

0      0.186
1      0.184
2      0.183
3      0.182
4      0.181
       ...  
198    0.123
199    0.121
200    0.120
201    0.119
202    0.118
Name: ndvi, Length: 203, dtype: float64

In [11]:
gfids = data2['gfid'].unique()

In [12]:
from numpy.random import choice

In [13]:
import numpy as np

In [14]:
np.random.seed(42)

In [15]:
np.random.shuffle(gfids)

In [16]:
train_ids = gfids[:300]
test_ids = gfids[300:]

In [17]:
indices = {}
indices['train'] = list([int(x) for x in train_ids])
indices['test'] = list([int(x) for x in test_ids])

In [18]:
import json
with open('Dewas.json', 'w') as f:
    json.dump(indices, f)

In [19]:
data2.head()

Unnamed: 0,gfid,state,district,village,lon,lat,wheat,datenum,date,ndvi,month,day
0,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,0,2020-10-20,0.186,10,20
1,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,1,2020-10-21,0.184,10,21
2,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,2,2020-10-22,0.183,10,22
3,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,3,2020-10-23,0.182,10,23
4,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,4,2020-10-24,0.181,10,24


In [20]:
data2 = data2.dropna()

In [21]:
data2.shape

(76125, 12)

In [22]:
data2.shape

(76125, 12)

In [23]:
datasets_train = []
datasets_test = []
y_train = []
y_test = []
for _ in gfids:
    rolling_data = []
    rolling_data.append(data2[data2['gfid']==_]['ndvi'].iloc[4:])
    for i in range(2, 11):
        rolling_data.append(data2[data2['gfid']==_]['ndvi'].rolling(i).mean())
    data_1_rolling = pd.concat(rolling_data, axis=1)
    data_1_rolling = data_1_rolling.dropna()
    data_1_rolling.columns = ['ndvi'] + [f'ndvi_{i}' for i in range(2,11)]
    if _ in train_ids:
        datasets_train.append(data_1_rolling[:150].values.reshape(15,10,10))
        y_train += [data_points.loc[data_points['gfid'] == _, 'wheat'].iloc[0]]
    else:
        datasets_test.append(data_1_rolling[:150].values.reshape(15,10,10))
        y_test += [data_points.loc[data_points['gfid'] == _, 'wheat'].iloc[0]]

In [24]:
data_train = torch.stack([torch.Tensor(dataset) for dataset in datasets_train])
data_test = torch.stack([torch.Tensor(dataset) for dataset in datasets_test])

In [25]:
def calc_out(h_in, w_in, ker, pad=0, stride=1):
    h_out = (h_in + 2 * pad - (ker - 1) - 1) / (stride) + 1
    w_out = (w_in + 2 * pad - (ker - 1) - 1) / (stride) + 1
    return h_out, w_out

In [26]:
class MyCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes  #number of classes
        
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=15, out_channels=30, kernel_size=2, stride=2, padding=1),  # (b x 30 x 6 x 6)
            nn.ReLU(),
            nn.Conv2d(30, 64, 2, padding=0, stride=1),  # (b x 64 x 5 x 5)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=0),  # (b x 64 x 4 x 4)
        )
        # classifier is just a name for linear layers
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.1, inplace=True),
            nn.Linear(in_features=(64 * 4 * 4), out_features=256),
            nn.ReLU(),
            #nn.Dropout(p=0.5, inplace=True),
            #nn.Linear(in_features=256, out_features=256),
            #nn.ReLU(),
            nn.Linear(in_features=256, out_features=num_classes),
            nn.Sigmoid()
        )
        #self.init_bias()

    def init_bias(self):
        for layer in self.net:
            if isinstance(layer, nn.Conv2d):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 0)
        # original paper = 1 for Conv2d layers 2nd, 4th, and 5th conv layers
        nn.init.constant_(self.net[0].bias, 1)
        nn.init.constant_(self.net[2].bias, 1)
        #nn.init.constant_(self.net[4].bias, 1)

    def forward(self, x):
        """
        Pass the input through the net.
        Args:
            x (Tensor): input tensor
        Returns:
            output (Tensor): output tensor
        """
        x = self.net(x)
        x = x.view(-1, 64 * 4 * 4)  # reduce the dimensions for linear layer input
        return self.classifier(x)

In [27]:
MyCNN = MyCNN(num_classes=2)

In [28]:
criterion = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(params=MyCNN.parameters(), lr=0.0001)#1, weight_decay=0.5)

In [29]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x24940eb1be0>

In [30]:
y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_test))

y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_test_tensors = y_test_tensors.type(torch.LongTensor)

In [31]:
X = data_train
y = y_train_tensors

In [32]:
res = []

In [33]:
n_epochs = 100  # or whatever
batch_size = 30  # or whatever

for epoch in range(n_epochs):

    # X is a torch Variable
    permutation = torch.randperm(X.size()[0])

    for i in range(0, X.size()[0], batch_size):
        optimizer.zero_grad()

        indices = permutation[i:i + batch_size]
        batch_x, batch_y = X[indices], y[indices]

        outputs = MyCNN.forward(batch_x) #forward pass

        # obtain the loss function
        loss = criterion(outputs, batch_y)
        res.append(loss)
        loss.backward() #calculates the loss of the loss function

        optimizer.step() #improve from loss, i.e backprop
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

Epoch: 0, loss: 0.69055
Epoch: 1, loss: 0.69177
Epoch: 2, loss: 0.69123
Epoch: 3, loss: 0.69005
Epoch: 4, loss: 0.68600
Epoch: 5, loss: 0.68254
Epoch: 6, loss: 0.67728
Epoch: 7, loss: 0.65779
Epoch: 8, loss: 0.65555
Epoch: 9, loss: 0.63226
Epoch: 10, loss: 0.63890
Epoch: 11, loss: 0.65890
Epoch: 12, loss: 0.65263
Epoch: 13, loss: 0.62813
Epoch: 14, loss: 0.64518
Epoch: 15, loss: 0.60604
Epoch: 16, loss: 0.71959
Epoch: 17, loss: 0.54802
Epoch: 18, loss: 0.63301
Epoch: 19, loss: 0.59190
Epoch: 20, loss: 0.61295
Epoch: 21, loss: 0.61179
Epoch: 22, loss: 0.69637
Epoch: 23, loss: 0.68450
Epoch: 24, loss: 0.60049
Epoch: 25, loss: 0.70719
Epoch: 26, loss: 0.58988
Epoch: 27, loss: 0.67193
Epoch: 28, loss: 0.59992
Epoch: 29, loss: 0.60697
Epoch: 30, loss: 0.59032
Epoch: 31, loss: 0.59542
Epoch: 32, loss: 0.56685
Epoch: 33, loss: 0.62591
Epoch: 34, loss: 0.57419
Epoch: 35, loss: 0.56667
Epoch: 36, loss: 0.63944
Epoch: 37, loss: 0.66478
Epoch: 38, loss: 0.61943
Epoch: 39, loss: 0.66849
Epoch: 40,

In [34]:
X_test_tensors = data_test

In [35]:
test_predict = MyCNN(X_test_tensors)#forward pass 

In [36]:
test_predict

tensor([[8.5675e-01, 1.5794e-01],
        [9.9993e-01, 1.0931e-04],
        [9.6581e-01, 3.7172e-02],
        [8.8820e-01, 1.2458e-01],
        [1.4697e-01, 8.4143e-01],
        [1.7471e-01, 8.2226e-01],
        [9.4348e-01, 6.6695e-02],
        [7.6777e-01, 2.4699e-01],
        [1.8034e-01, 8.0534e-01],
        [9.9507e-01, 6.2772e-03],
        [7.7432e-02, 9.2329e-01],
        [9.9698e-01, 3.9285e-03],
        [1.3717e-01, 8.6258e-01],
        [6.0570e-01, 4.1468e-01],
        [5.3790e-01, 4.5963e-01],
        [8.2267e-01, 2.0192e-01],
        [9.5879e-01, 4.7886e-02],
        [1.4979e-01, 8.4425e-01],
        [9.4752e-01, 6.2780e-02],
        [7.9062e-01, 2.2497e-01],
        [9.9142e-01, 9.9480e-03],
        [5.7573e-02, 9.4389e-01],
        [3.0913e-01, 6.8939e-01],
        [7.1460e-01, 2.9604e-01],
        [8.4612e-02, 9.1367e-01],
        [1.2568e-01, 8.7987e-01],
        [1.1006e-01, 8.9195e-01],
        [7.5012e-01, 2.5931e-01],
        [1.0283e-01, 8.9463e-01],
        [6.686

In [37]:
test_predict.argmax(dim=1)

tensor([0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
        1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1,
        1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
        0, 0, 1])

In [38]:
# 100 эпох, батч 30
print(classification_report(y_test_tensors, torch.argmax(test_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.83      0.75      0.79        40
           1       0.74      0.83      0.78        35

    accuracy                           0.79        75
   macro avg       0.79      0.79      0.79        75
weighted avg       0.79      0.79      0.79        75



In [39]:
accuracy_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.7866666666666666

In [40]:
roc_auc_score(y_test_tensors, torch.argmax(test_predict, dim=1))

0.7892857142857143

In [41]:
train_predict = MyCNN(X)#forward pass 

In [42]:
# 100 эпох, батч 30
print(classification_report(y_train_tensors, torch.argmax(train_predict, dim=1)))

              precision    recall  f1-score   support

           0       0.77      0.72      0.75       157
           1       0.71      0.77      0.74       143

    accuracy                           0.74       300
   macro avg       0.74      0.74      0.74       300
weighted avg       0.75      0.74      0.74       300



In [43]:
accuracy_score(y_train_tensors, torch.argmax(train_predict, dim=1))

0.7433333333333333

In [44]:
roc_auc_score(y_train_tensors, torch.argmax(train_predict, dim=1))

0.7444879960803528