<a href="https://colab.research.google.com/github/pantprakhar04/smart_crop_prediction/blob/master/Test_run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import warnings
warnings.filterwarnings("ignore")

# **DATASET DESCRIPTION**

This data set provides an integrated collection of 

1.   ground-based meteorological, radiometric, and vegetation measurements,
2.   flux-based estimates of gross primary production (GPP), and
3.   numerous vegetation indices derived from satellite imagery

for three eddy covariance flux tower locations near Lincoln, Nebraska, USA. 

[LINK](https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=1217)

In [0]:
#importing dataset from local drive

from google.colab import files
uploaded = files.upload()

Saving train.csv to train.csv


In [0]:
#importing libraries

import pickle
import pandas as pd
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from IPython.display import display

In [0]:
#storing dataset in the form of a Pandas dataframe

data = pd.read_csv('train.csv')

In [0]:
#separate data into target variable and feature set

y_all = data['Crop']
X_all = data.drop(['Crop'], 1)

In [0]:
#data standardization

from sklearn.preprocessing import scale

cols = data.columns.drop(['Crop'], 1)
for col in cols:
  X_all[col] = scale(X_all[col])

In [0]:
#shuffle and split dataset into training and testing set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, train_size=0.8, random_state=2, stratify=y_all)

In [0]:
from sklearn.metrics import f1_score

def train_classifier(clf, X_train, y_train):
    ''' Fits a classifier to the training data. '''
    
    # Start the clock, train the classifier, then stop the clock
    start = time()
    clf.fit(X_train, y_train)
    end = time()
    
    # Print the results
    print ("Trained model in {:.4f} seconds".format(end - start))

    
def predict_labels(clf, features, target):
    ''' Makes predictions using a fit classifier based on F1 score. '''
    
    # Start the clock, make predictions, then stop the clock
    start = time()
    y_pred = clf.predict(features)
    
    end = time()
    # Print and return results
    print ("Made predictions in {:.4f} seconds.".format(end - start))
    
    return f1_score(target, y_pred, pos_label='H', average=None), sum(target == y_pred) / float(len(y_pred))
  
def train_predict(clf, X_train, y_train, X_test, y_test):
    ''' Train and predict using a classifer based on F1 score. '''
    
    # Indicate the classifier and the training set size
    print ("Training a {} using a training set size of {}. . .".format(clf.__class__.__name__, len(X_train)))
    
    # Train the classifier
    train_classifier(clf, X_train, y_train)
    
    # Print the results of prediction for both training and testing
    f1, acc = predict_labels(clf, X_train, y_train)
    print (f1, acc)
    print ("F1 score and accuracy score for training set: {:.4f} , {:.4f}.".format(f1.mean() , acc))
    
    f1, acc = predict_labels(clf, X_test, y_test)
    print ("F1 score and accuracy score for test set: {:.4f} , {:.4f}.".format(f1.mean(), acc))

In [0]:
#to measure training time
from time import time

#build some models
clf_A = LogisticRegression(random_state=42)
clf_B = SVC(random_state=912, kernel='rbf')
clf_C = xgb.XGBClassifier(seed=82)

train_predict(clf_A, X_train, y_train, X_test, y_test)
print ('')
train_predict(clf_B, X_train, y_train, X_test, y_test)
print ('')
train_predict(clf_C, X_train, y_train, X_test, y_test)
print ('')

Training a LogisticRegression using a training set size of 398. . .
Trained model in 0.0466 seconds
Made predictions in 0.0057 seconds.
[0.91060291 0.5        0.92015209] 0.8869346733668342
F1 score and accuracy score for training set: 0.7769 , 0.8869.
Made predictions in 0.0011 seconds.
F1 score and accuracy score for test set: 0.8344 , 0.9100.

Training a SVC using a training set size of 398. . .
Trained model in 0.0205 seconds
Made predictions in 0.0146 seconds.
[0.87346939 0.         0.87732342] 0.8341708542713567
F1 score and accuracy score for training set: 0.5836 , 0.8342.
Made predictions in 0.0045 seconds.
F1 score and accuracy score for test set: 0.6066 , 0.8600.

Training a XGBClassifier using a training set size of 398. . .
Trained model in 0.8391 seconds
Made predictions in 0.0080 seconds.
[1. 1. 1.] 1.0
F1 score and accuracy score for training set: 1.0000 , 1.0000.
Made predictions in 0.0033 seconds.
F1 score and accuracy score for test set: 0.8113 , 0.9100.



In [0]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

In [0]:
#tuning some parameters

parameters = { 'learning_rate' : [0.1],
               'n_estimators' : [40],
               'max_depth': [3],
               'min_child_weight': [3],
               'gamma':[0.4],
               'subsample' : [0.8],
               'colsample_bytree' : [0.8],
               'scale_pos_weight' : [1],
               'reg_alpha':[1e-5]
             }

In [0]:
#Initialize the classifier
clf = xgb.XGBClassifier(seed=2)

#Make an f1 scoring function using 'make_scorer' 
f1_scorer = make_scorer(f1_score, average='micro')

#Perform grid search on the classifier using the f1_scorer as the scoring method
grid_obj = GridSearchCV(clf,
                        scoring=f1_scorer,
                        param_grid=parameters,
                        cv=5)

In [0]:
#Fit the grid search object to the training data and find the optimal parameters
grid_obj = grid_obj.fit(X_train,y_train)

In [0]:
# Get the estimator
clf = grid_obj.best_estimator_
print (clf)

# Report the final F1 score for training and testing after parameter tuning
f1, acc = predict_labels(clf, X_train, y_train)
print ("F1 score and accuracy score for training set: {:.4f} , {:.4f}.".format(f1.max() , acc))
    
f1, acc = predict_labels(clf, X_test, y_test)
print ("F1 score and accuracy score for test set: {:.4f} , {:.4f}.".format(f1.max() , acc))

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bytree=0.8, gamma=0.4, learning_rate=0.1,
              max_delta_step=0, max_depth=3, min_child_weight=3, missing=None,
              n_estimators=40, n_jobs=1, nthread=None,
              objective='multi:softprob', random_state=0, reg_alpha=1e-05,
              reg_lambda=1, scale_pos_weight=1, seed=2, silent=True,
              subsample=0.8)
Made predictions in 0.0085 seconds.
F1 score and accuracy score for training set: 0.9631 , 0.9497.
Made predictions in 0.0029 seconds.
F1 score and accuracy score for test set: 0.9231 , 0.8800.


In [0]:
# Serialize clf object into a file called mlp_nn.pkg on disk using pickle
pickle.dump(clf, open("model.pkl", "wb"))

In [0]:
model = pickle.load(open("model.pkl", "rb"))

In [0]:
import numpy as np
data = np.array([[1.1, 892.673, 0.818, -207.328, 1.727, 1495.392, 0.94, -372.157, -9999, -9999, -9999, 93.08, 0.899, 0.48, -0.031, 0.306, 0.844, 0.228, 10.829, 8.688, 5.165, 1.251, 2.222, 1.397, 26.285, 2.713, 4.264, 1.029,1.101, 1.082, 1.188, 1.123, 1.132, 1.153, 1.181, 1.132, 1.139, 1.192, 1.334, 1.748, 2.35, 2.665, 2.798, 2.677, 2.29, 1.95, 1.788, 1.733, 1.615, 1.49, 1.471, 1.432, 1.356, 1.293, 1.25, 1.336, 1.641, 2.885, 5.643, 9.934, 16.423, 23.074, 27.343, 24.737, 29.029, 28.751, 28.511, 28.608, 28.393, 27.553, 26.843, 27.155, 26.637, 26.236, 25.215, 24.049, 23.122, 19.35]])

print(model.predict(data))

ValueError: ignored

In [0]:
data.shape

(78,)

In [0]:
import numpy as np
import json

data = json.dumps({'GPP_Day':1.1, 'PARin_Day':892.673, 'fAPAR_Day':0.818, 
                   'LE_Day':-207.328, 'GPP_MDay':1.727, 'PARin_MDay':1495.392,
                   'fAPAR_MDay':0.94, 'LE_MDay':-372.157, 'canopy_Chl':-9999,
                   'LAI':-9999, 'LAIgreen':-9999,'VF':93.08,
                   'TM_NDVI':0.899, 'TM_EVI2':0.48, 'TM_WDRVI_05':-0.031,
                   'TM_WDRVI_1':0.306,'TM_Green NDVI':0.844,
                   'TM_VARI':0.228, 'TM_CIgreen':10.829, 'MODIS_CIgreen':8.688,
                   'MERIS_CIre':5.165, 'R_TM_blue':1.251,
                   'R_TM_green':2.222, 'R_TM_red':1.397, 'R_TM_IR':26.285,
                   'R_MODIS_green':2.713, 'R_Meris_RE':4.264,
                   'R_400':1.029,'R_410':1.101, 'R_420':1.082, 'R_430':1.188,
                   'R_440':1.123,
                   'R_450':1.132, 
                   'R_460':1.153, 
                   'R_470':1.181, 'R_480':1.132,
                   'R_490':1.139, 'R_500':1.192, 'R_510':1.334, 'R_520':1.748, 
                   'R_530':2.35, 'R_540':2.665,
                   'R_550':2.798, 'R_560':2.677, 'R_570':2.29, 'R_580':1.95, 
                   'R_590':1.788, 'R_600':1.733, 
                   'R_610':1.615, 'R_620':1.49,
                   'R_630':1.471, 'R_640':1.432, 'R_650':1.356, 'R_660':1.293, 
                   'R_670':1.25,
                   'R_680':1.336, 'R_690':1.641, 'R_700':2.885,
                   'R_710':5.643, 'R_720':9.934, 'R_730':16.423, 'R_740':23.074,
                   'R_750':27.343,
                   'R_760':24.737, 'R_770':29.029, 'R_780':28.751,
                   'R_790':28.511, 'R_800':28.608, 'R_810':28.393,
                   'R_820':27.553,
                   'R_830':26.843, 'R_840':27.155, 'R_850':26.637,
                   'R_860':26.236, 'R_870':25.215, 'R_880':24.049,
                   'R_890':23.122, 'R_900':19.35})
data = np.array(data)
print(model.predict(data))

ValueError: ignored

In [0]:
print(data.shape)

()


In [0]:
import json
import requests

url = "http://localhost:5000/api"
data = json.dumps({'GPP_Day':1.1, 'PARin_Day':892.673, 'fAPAR_Day':0.818, 
                   'LE_Day':-207.328, 'GPP_MDay':1.727, 'PARin_MDay':1495.392,
                   'fAPAR_MDay':0.94, 'LE_MDay':-372.157, 'canopy_Chl':-9999,
                   'LAI':-9999, 'LAIgreen':-9999,'VF':93.08,
                   'TM_NDVI':0.899, 'TM_EVI2':0.48, 'TM_WDRVI_05':-0.031,
                   'TM_WDRVI_1':0.306,'TM_Green NDVI':0.844,
                   'TM_VARI':0.228, 'TM_CIgreen':10.829, 'MODIS_CIgreen':8.688,
                   'MERIS_CIre':5.165, 'R_TM_blue':1.251,
                   'R_TM_green':2.222, 'R_TM_red':1.397, 'R_TM_IR':26.285,
                   'R_MODIS_green':2.713, 'R_Meris_RE':4.264,
                   'R_400':1.029,'R_410':1.101, 'R_420':1.082, 'R_430':1.188,
                   'R_440':1.123,
                   'R_450':1.132, 
                   'R_460':1.153, 
                   'R_470':1.181, 'R_480':1.132,
                   'R_490':1.139, 'R_500':1.192, 'R_510':1.334, 'R_520':1.748, 
                   'R_530':2.35, 'R_540':2.665,
                   'R_550':2.798, 'R_560':2.677, 'R_570':2.29, 'R_580':1.95, 
                   'R_590':1.788, 'R_600':1.733, 
                   'R_610':1.615, 'R_620':1.49,
                   'R_630':1.471, 'R_640':1.432, 'R_650':1.356, 'R_660':1.293, 
                   'R_670':1.25,
                   'R_680':1.336, 'R_690':1.641, 'R_700':2.885,
                   'R_710':5.643, 'R_720':9.934, 'R_730':16.423, 'R_740':23.074,
                   'R_750':27.343,
                   'R_760':24.737, 'R_770':29.029, 'R_780':28.751,
                   'R_790':28.511, 'R_800':28.608, 'R_810':28.393,
                   'R_820':27.553,
                   'R_830':26.843, 'R_840':27.155, 'R_850':26.637,
                   'R_860':26.236, 'R_870':25.215, 'R_880':24.049,
                   'R_890':23.122, 'R_900':19.35})
r = requests.post(url, data)

print (r.json())

In [0]:
data.shape

()