In [1]:
import os
import pandas as pd
import seaborn as sns
import warnings

from catboost import CatBoostClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA

In [2]:
warnings.filterwarnings('ignore')

In [3]:
sns.set_theme()
sns.set_palette('Paired')

In [4]:
os.listdir('data')    

['Dewas_NDVI.csv',
 'Dewas_points.csv',
 'Kaithal_NDVI.csv',
 'Kaithal_points.csv',
 'Karnal_NDVI.csv',
 'Karnal_points.csv']

In [5]:
districts = ['Dewas', 'Kaithal', 'Karnal']

In [6]:
indices = {'Dewas':{}, 'Kaithal':{}, 'Karnal':{}}

In [7]:
data = dict(zip(districts, [{}, {}, {}]))
for district in districts:
    points = pd.read_csv(f'data/{district}_points.csv')
    ndvi = pd.read_csv(f'data/{district}_NDVI.csv')
    data[district]['points'] = points
    data[district]['NDVI'] = ndvi
    data[district]['merged'] = points.merge(ndvi, left_on='gfid', right_on='gfid')

In [8]:
data['Dewas']['merged'].head()

Unnamed: 0,gfid,state,district,village,lon,lat,wheat,datenum,date,ndvi
0,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,0,2020-10-20,0.186
1,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,1,2020-10-21,0.184
2,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,2,2020-10-22,0.183
3,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,3,2020-10-23,0.182
4,72001,Madhya Pradesh,Dewas,VijayaganjMandi,75.96199,23.218479,0,4,2020-10-24,0.181


# Dewas

In [9]:
dewas_prepared = {}
for gfid in set(data['Dewas']['merged']['gfid']):
    d = data['Dewas']['merged'].loc[data['Dewas']['merged']['gfid']==gfid, 'ndvi'].copy()
    arima = ARIMA(d, order=(5, 1, 5)).fit()
    dewas_prepared[gfid] = arima.params.values

In [10]:
dewas_prepared = pd.DataFrame(dewas_prepared).T
dewas_prepared = dewas_prepared.sort_index()

In [11]:
target = data['Dewas']['points'][['wheat', 'gfid']]

In [12]:
target = target.sort_values('gfid')

In [13]:
X_train, X_test, y_train, y_test = train_test_split(dewas_prepared, target['wheat'], test_size=0.2, shuffle=True, stratify=target['wheat'])

In [14]:
indices['Dewas']['train'] = X_train.index.values.tolist()
indices['Dewas']['test'] = X_test.index.values.tolist()

In [15]:
model = CatBoostClassifier(iterations=100, depth=3, random_state=42, l2_leaf_reg=7)

In [16]:
model.fit(X_train, y_train, verbose=False)

<catboost.core.CatBoostClassifier at 0x1fa9dbc2970>

In [17]:
predict_train = model.predict(X_train)

In [18]:
print(classification_report(y_train, predict_train))

              precision    recall  f1-score   support

           0       0.65      0.62      0.64       174
           1       0.62      0.65      0.63       165

    accuracy                           0.63       339
   macro avg       0.63      0.63      0.63       339
weighted avg       0.63      0.63      0.63       339



In [19]:
predict = model.predict(X_test)

In [20]:
print(classification_report(y_test, predict))

              precision    recall  f1-score   support

           0       0.47      0.50      0.48        44
           1       0.42      0.39      0.41        41

    accuracy                           0.45        85
   macro avg       0.44      0.45      0.44        85
weighted avg       0.45      0.45      0.45        85



# Kaithal

In [21]:
kaithal_prepared = {}
for gfid in set(data['Kaithal']['merged']['gfid']):
    d = data['Kaithal']['merged'].loc[data['Kaithal']['merged']['gfid']==gfid, 'ndvi'].copy()
    arima = ARIMA(d, order=(1, 1, 5)).fit()
    kaithal_prepared[gfid] = arima.params.values

In [22]:
kaithal_prepared = pd.DataFrame(kaithal_prepared).T
kaithal_prepared = kaithal_prepared.sort_index()

In [23]:
target = data['Kaithal']['points'][['wheat', 'gfid']]

In [24]:
target = target.sort_values('gfid')

In [25]:
X_train, X_test, y_train, y_test = train_test_split(kaithal_prepared, target['wheat'], test_size=0.2, shuffle=True, stratify=target['wheat'])

In [26]:
indices['Kaithal']['train'] = X_train.index.values.tolist()
indices['Kaithal']['test'] = X_test.index.values.tolist()

In [27]:
model = CatBoostClassifier(iterations=100, depth=3, random_state=42, l2_leaf_reg=7)

In [28]:
model.fit(X_train, y_train, verbose=False)

<catboost.core.CatBoostClassifier at 0x1fa96222b50>

In [29]:
predict_train = model.predict(X_train)

In [30]:
print(classification_report(y_train, predict_train))

              precision    recall  f1-score   support

           0       0.71      0.50      0.59        70
           1       0.66      0.83      0.74        83

    accuracy                           0.68       153
   macro avg       0.69      0.67      0.66       153
weighted avg       0.69      0.68      0.67       153



In [31]:
predict = model.predict(X_test)

In [32]:
print(classification_report(y_test, predict))

              precision    recall  f1-score   support

           0       0.58      0.39      0.47        18
           1       0.59      0.76      0.67        21

    accuracy                           0.59        39
   macro avg       0.59      0.58      0.57        39
weighted avg       0.59      0.59      0.57        39



# Karnal

In [None]:
karnal_prepared = {}
for gfid in set(data['Karnal']['merged']['gfid']):
    d = data['Karnal']['merged'].loc[data['Karnal']['merged']['gfid']==gfid, 'ndvi'].copy()
    arima = ARIMA(d, order=(1, 0, 5)).fit()
    karnal_prepared[gfid] = arima.params.values

In [None]:
karnal_prepared = pd.DataFrame(karnal_prepared).T
karnal_prepared = karnal_prepared.sort_index()

In [None]:
target = data['Karnal']['points'][['wheat', 'gfid']]

In [None]:
target = target.sort_values('gfid')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(karnal_prepared, target['wheat'], test_size=0.2, shuffle=True, stratify=target['wheat'])

In [None]:
indices['Karnal']['train'] = X_train.index.values.tolist()
indices['Karnal']['test'] = X_test.index.values.tolist()

In [None]:
model = CatBoostClassifier(iterations=100, depth=3, random_state=42, l2_leaf_reg=7)

In [None]:
model.fit(X_train, y_train, verbose=False)

In [None]:
predict_train = model.predict(X_train)

In [None]:
print(classification_report(y_train, predict_train))

In [None]:
predict = model.predict(X_test)

In [None]:
print(classification_report(y_test, predict))

In [None]:
with open('indices.json', 'w') as f:
    json.dump(indices, f)