# Build a Model to Predict Crop Production in Kharif Season

In [22]:
import pickle
with open('df_crop_produce.pickle', 'rb') as handle:
    df_crop_produce = pickle.load(handle)

with open('df_rainfall.pickle', 'rb') as handle:
    df_rainfall = pickle.load(handle)
    
with open('df_temp.pickle', 'rb') as handle:
    df_temp = pickle.load(handle)
    
#Compute the Rainfall in centimeters during Kharif season (July-Oct)
rainfall_kharif = df_rainfall[df_rainfall['Parameter'] == 'Actual'].groupby(['YEAR'])['Khariff'].sum()/100 

In [23]:
print('First few records of Rainfall')
print(df_rainfall.head(), '\n')

print('First few records of Temperature')
print(df_temp.head(), '\n')

print('First few records of Crop produce data')
print(df_crop_produce.head(), '\n')

First few records of Rainfall
                   SUBDIVISION  YEAR             Parameter   JAN    FEB  \
276  ANDAMAN & NICOBAR ISLANDS  1997                Actual   9.5    0.0   
277  ANDAMAN & NICOBAR ISLANDS  1997  Percentage departure -80.8 -100.0   
278  ANDAMAN & NICOBAR ISLANDS  1997      No. of districts   3.0    3.0   
279  ANDAMAN & NICOBAR ISLANDS  1998                Actual   0.9    0.0   
280  ANDAMAN & NICOBAR ISLANDS  1998  Percentage departure -98.2 -100.0   

       MAR    APR    MAY    JUN    JUL  ...      OCT    NOV   DEC  ANNUAL  \
276    0.2   15.6  281.1  199.5  918.5  ...    128.7  292.8  38.4  2755.1   
277  -99.3  -78.4  -21.0  -57.7  131.1  ...    -55.5   25.7 -75.0    -5.4   
278    3.0    2.0    3.0    3.0    1.0  ...      1.0    1.0   1.0     NaN   
279    0.0    0.0  348.9  600.0  364.5  ...    618.6  227.8  89.0  2846.4   
280 -100.0 -100.0   -1.9   27.3   -8.3  ...    113.7   -2.2 -42.0    -2.2   

       JF    MAM    JJAS    OND  Khariff   Rabi  
276   

Create a dataset that contains the crop production data, Temperature data and Rainfall data pertaining to Khariff season

In [24]:
import pandas as pd
 
#Compute the total number of crops produced in Kharif season and total number of districts producing crops in Kharif Season.
df_crop_count = df_crop_produce[ (df_crop_produce['Season'] == 'Kharif') ]\
          .groupby(['Crop_Year'])[['Production', 'Area']].count()
df_crop_count.rename(columns={'Production':'Production_Count', 'Area':'Area_Count'}, inplace=True) 

#Compute the total Yeild of crops produced in Kharif season and total land Area used for cultivation in Kharif Season.
df_crop_sum = df_crop_produce[ (df_crop_produce['Season'] == 'Kharif') ]\
          .groupby(['Crop_Year'])[['Production', 'Area']].sum()
df_crop_sum.rename(columns={'Production':'Production_Sum', 'Area':'Area_Sum'}, inplace=True)

df_crop_count.reset_index(drop=True, inplace=True)
df_crop_sum.reset_index(drop=True, inplace=True) 
rainfall_kharif.reset_index(drop=True, inplace=True)

#Concatenate all rainfall data, temperature data and crop production data
df = pd.concat([df_crop_count, df_crop_sum, df_temp['Kharif'].reset_index(drop=True) , \
                rainfall_kharif ], axis = 1)

#remove the 2015 record as the crop production data in that year is incomplete
df = df[:-1]

#rearranging the columns of the dataframe
df = df[['Production_Count', 'Area_Count', 'Area_Sum', 'Kharif', 'Khariff', 'Production_Sum']]

print(df)

    Production_Count  Area_Count     Area_Sum  Kharif  Khariff  Production_Sum
0               3415        3415  75682249.00   26.64  329.360    1.382803e+08
1               4563        4708  83779058.00   26.79  368.052    1.610406e+08
2               5351        5373  78214722.00   26.63  334.555    1.107851e+08
3               5981        6025  86812995.00   26.61  289.456    1.142366e+08
4               5846        5873  87807709.40   26.90  289.739    1.277101e+08
5               5805        5865  80031223.00   26.90  250.052    9.665179e+07
6               6145        6161  88706585.43   26.61  331.896    1.382855e+08
7               5412        5552  80700959.42   26.44  298.543    2.399453e+08
8               5335        5510  82910545.47   26.60  352.631    2.910593e+08
9               5148        5303  82028612.17   26.72  336.148    2.615515e+08
10              5347        5472  70839304.00   26.64  367.336    2.525192e+08
11              5350        5479  82601771.00   26.6

Number of records are too less. 

In the first attempt will build a MultiLayer Perceptron Regressor model with 14 records for training and 3 records for Testing

In [10]:
import numpy as np
import pandas as pd  
import pickle, os
from sklearn.preprocessing import StandardScaler
import tensorflow as tf


def trainModel(X_train, X_test, y_train, y_test, scaler_Clf, is_feedback=False):     

    with tf.Graph().as_default():        
        N, D = X_train.shape
        
        #Build 3 Hidden Layers with the following sizes
        M = 30
        H1 = 45
        H2 = 30
        
        X = tf.placeholder(tf.float32, shape=(None, D))
        y = tf.placeholder(tf.float32, shape=(None))

        # input -> hidden1
        W = tf.Variable(tf.random_normal(shape=(D, M)) * np.sqrt(2.0 / M))
        b = tf.Variable(np.zeros(M).astype(np.float32))

        # hidden1 -> hidden2
        W1 = tf.Variable(tf.random_normal(shape=(M, H1)) * np.sqrt(2.0 / H1))
        b1 = tf.Variable(np.zeros(H1).astype(np.float32))
        
        # hidden2 -> hidden3
        W2 = tf.Variable(tf.random_normal(shape=(H1, H2)) * np.sqrt(2.0 / H2))
        b2 = tf.Variable(np.zeros(H2).astype(np.float32))

        # hidden3 -> output
        V = tf.Variable(tf.random_normal(shape=(H2, 1)) * np.sqrt(2.0 / D))
        c = tf.Variable(np.zeros(1).astype(np.float32))

        # construct the reconstruction
        Z = tf.nn.tanh(tf.matmul(X, W) + b)
        Zh1 = tf.nn.tanh(tf.matmul(Z, W1) + b1) 
        Zh2 = tf.nn.tanh(tf.matmul(Zh1, W2) + b2) 
        logits = tf.matmul(Zh2, V) + c
        X_hat = (logits)

        cost = tf.losses.mean_squared_error(logits, y) + tf.nn.l2_loss(V) 
        
        # make the trainer, using RMS optimiser
        train_op = tf.train.RMSPropOptimizer(learning_rate=0.00001).minimize(cost)

        # set up session and variables for later
        init_op = tf.global_variables_initializer()
        n_epochs = 1000  
        saver = tf.train.Saver()
        
        #When deploying in GPU enabled machines
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.1 

        with tf.Session(config=config) as sess:
            sess.run(init_op)
            if os.path.isfile("./final_model.ckpt"):
                saver.restore(sess, "./final_model.ckpt")
            while n_epochs > 0:
                sess.run(train_op, feed_dict={X:X_train, y:y_train})
                if n_epochs%40 == 0:
                    print(n_epochs, ' Loss: ', sess.run(cost, feed_dict={X:X_train, y:y_train}))
                n_epochs = n_epochs - 1
            res = sess.run(X_hat, feed_dict={X:X_test}) 
            saver.save(sess, "./final_model.ckpt")
        
        print('\n')
        print('Predicted vs Actual Values', '\n')
        test_records = []
        for i, test in enumerate(X_test):
            pred = round(clf.inverse_transform(np.append(test, res[i]))[-1],2)
            real = round(clf.inverse_transform(np.append(test, y_test[i]))[-1],2)
            print(pred, real)
            test_records.append(clf.inverse_transform(np.append(test, y_test[i])))
            
        if not is_feedback:
            with open('test_records.pickle', 'wb') as handle:
                pickle.dump(test_records, handle, protocol=pickle.HIGHEST_PROTOCOL)                        
                                                      
if __name__ == '__main__':
    np.random.seed(39)
    np_df = df.as_matrix()
    np.random.shuffle(np_df)
 
    clf = StandardScaler() 
    #Fit only the training data
    clf.fit(np_df[:15,:])
    np_df = clf.transform(np_df)
    X_Hi = np_df[:,:-1]
    y_Hi = np_df[:, -1]
    
    X_train, X_test = X_Hi[:15, :], X_Hi[15:, :]
    y_train, y_test = y_Hi[:15], y_Hi[15:]
    trainModel(X_train, X_test, y_train, y_test, clf) 



1000  Loss:  231.38388
960  Loss:  211.64696
920  Loss:  196.4836
880  Loss:  182.84583
840  Loss:  170.69438
800  Loss:  160.00078
760  Loss:  150.73093
720  Loss:  142.8371
680  Loss:  136.25078
640  Loss:  130.88953
600  Loss:  126.65739
560  Loss:  123.39828
520  Loss:  120.90074
480  Loss:  118.97827
440  Loss:  117.51416
400  Loss:  116.4211
360  Loss:  115.6083
320  Loss:  114.997154
280  Loss:  114.524345
240  Loss:  114.14736
200  Loss:  113.83958
160  Loss:  113.58618
120  Loss:  113.37526
80  Loss:  113.19923
40  Loss:  113.05195


Predicted vs Actual Values 

240676136.24 299217189.78
244250087.44 270822856.5
206162425.54 261551483.26


Given the shortage of data points the model performed fairly decently.

In the next attempt, would try to generate additional data points using Autoencoders and then rebuild a Multi Layer perceptron model using this augmented dataset for training. Idea is to see if there is any improvement in prediction of the test set between the 2 models

In [32]:
# https://deeplearningcourses.com/c/deep-learning-gans-and-variational-autoencoders
# https://www.udemy.com/deep-learning-gans-and-variational-autoencoders
from __future__ import print_function, division
from builtins import range, input
from sklearn.preprocessing import StandardScaler

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import pickle

class Autoencoder:
    def __init__(self, D, M, H1, H2, H3):
        # represents a batch of training data
        self.X = tf.placeholder(tf.float32, shape=(None, D))

        # input -> hidden1
        self.W = tf.Variable(tf.random_normal(shape=(D, M)) * np.sqrt(2.0 / (D+M)))
        self.b = tf.Variable(np.zeros(M).astype(np.float32))

        # hidden1 -> hidden2
        self.W1 = tf.Variable(tf.random_normal(shape=(M, H1)) * np.sqrt(2.0 / (M+H1)))
        self.b1 = tf.Variable(np.zeros(H1).astype(np.float32))

        # hidden1 -> hidden2
        self.W2 = tf.Variable(tf.random_normal(shape=(H1, H2)) * np.sqrt(2.0 / (H1+H2)))
        self.b2 = tf.Variable(np.zeros(H2).astype(np.float32))
        
        # hidden1 -> hidden2
        self.W3 = tf.Variable(tf.random_normal(shape=(H2, H3)) * np.sqrt(2.0 / (H2+H3)))
        self.b3 = tf.Variable(np.zeros(H3).astype(np.float32))
        
        # hidden2 -> output
        self.V = tf.Variable(tf.random_normal(shape=(H3, D)) )
        self.c = tf.Variable(np.zeros(D).astype(np.float32))

        # construct the reconstruction
        self.Z = tf.nn.relu(tf.matmul(self.X, self.W) + self.b)
        self.Zh = tf.nn.relu(tf.matmul(self.Z, self.W1) + self.b1)
        self.Zh1 = tf.nn.relu(tf.matmul(self.Zh, self.W2) + self.b2)
        self.Zh2 = tf.nn.relu(tf.matmul(self.Zh1, self.W3) + self.b3)
        logits = tf.matmul(self.Zh2, self.V) + self.c
        self.X_hat = (logits)

    
        self.cost = tf.nn.l2_loss(logits - self.X) 
        # make the trainer
        self.train_op = tf.train.RMSPropOptimizer(learning_rate=0.0001).minimize(self.cost)

        # set up session and variables for later
        self.init_op = tf.global_variables_initializer()
        self.sess = tf.InteractiveSession()
        self.sess.run(self.init_op)

    def fit(self, X, epochs=400, batch_sz=17):
        costs = []
        n_batches = len(X) // batch_sz 
        tf.reset_default_graph()
        print("n_batches:", n_batches)
        for i in range(epochs):
            if i % 10 == 0:
                print("epoch:", i)
            np.random.shuffle(X)
            for j in range(n_batches):
                batch = X[j*batch_sz:(j+1)*batch_sz]
                _, c, = self.sess.run((self.train_op, self.cost), feed_dict={self.X: batch})
                c /= batch_sz # just debugging
                costs.append(c)
                #if j % 100 == 0:
                    #print("iter: %d, cost: %.3f" % (j, c)) 
        print('Mean cost in the last 10 epochs: ', np.mean(costs[-10:]))
    
    def predict(self, X):
        return self.sess.run(self.X_hat, feed_dict={self.X: X})
  
    def get_data(self, df):    
        np.random.seed(0)
        data = df.as_matrix()
        np.random.shuffle(data)
        clf = StandardScaler()
        X = clf.fit_transform(data[:,:])
        Y = data[:,-1]
        return clf, X, Y  

def main(df):
    #best = 6, 500, 750, 750, 500, 0.01
    model = Autoencoder(6, 50, 75, 75, 50)
    clf, X, Y = model.get_data(df) 
    model.fit(X)
 
    count = 20000
    res = []
    while count>0 :
        i = np.random.choice(len(X))
        x = X[i]
        im = model.predict([x])  
        X = np.concatenate((X, im))
        res.append(clf.inverse_transform(im[0])) 
        count -= 1 
     
    with open('new_data.pickle', 'wb') as handle:
        pickle.dump(res, handle, protocol=pickle.HIGHEST_PROTOCOL)

if __name__ == '__main__':
    main(df)



n_batches: 1
epoch: 0
epoch: 10
epoch: 20
epoch: 30
epoch: 40
epoch: 50
epoch: 60
epoch: 70
epoch: 80
epoch: 90
epoch: 100
epoch: 110
epoch: 120
epoch: 130
epoch: 140
epoch: 150
epoch: 160
epoch: 170
epoch: 180
epoch: 190
epoch: 200
epoch: 210
epoch: 220
epoch: 230
epoch: 240
epoch: 250
epoch: 260
epoch: 270
epoch: 280
epoch: 290
epoch: 300
epoch: 310
epoch: 320
epoch: 330
epoch: 340
epoch: 350
epoch: 360
epoch: 370
epoch: 380
epoch: 390
Mean cost in the last 10 epochs:  0.0008323811268543497


We have generated 10000 new records that resemble the original dataset. Now with this augmented dataset will create a Multi Layer perceptron model and see if there is any improvement in the predictions of the same  Testset as done before

In [65]:
import numpy as np
import pandas as pd  
import pickle, os
from sklearn.preprocessing import StandardScaler
import tensorflow as tf


def augementedTrainModel(np_df, test_data): 
    np.random.seed(0)  
    
    clf = StandardScaler() 
    clf.fit(np_df)
    np_df = clf.transform(np_df) 
    test_data = clf.transform(test_data) 
    
    X_train, y_train = np_df[:, :-1], np_df[:, -1]
    X_test, y_test = test_data[:, :-1], test_data[:, -1]  
    
    tf.reset_default_graph()
    
    with tf.Graph().as_default():        
        N, D = X_train.shape
        
        #Build 2 Hidden Layers with the following sizes (100, 100)
        M = 100
        H1 = H2 = 200
        
        X = tf.placeholder(tf.float32, shape=(None, D))
        y = tf.placeholder(tf.float32, shape=(None))
       
        # input -> hidden1
        W = tf.Variable(tf.random_normal(shape=(D, M)) * np.sqrt(32.0 / (M+D)))
        b = tf.Variable(np.zeros(M).astype(np.float32))
        
        # hidden1 -> hidden2
        W1 = tf.Variable(tf.random_normal(shape=(M, H1)) * np.sqrt(4.0 / (H1+M)))
        b1 = tf.Variable(np.zeros(H1).astype(np.float32))
        
        # hidden2 -> hidden3, earlier had 3 hidden layers, M, H1, H2, now there are only 2 M, H1=H2
        #W2 = tf.Variable(tf.random_normal(shape=(H1, H2)) * np.sqrt(32.0 / (H2+H1)))
        #b2 = tf.Variable(np.zeros(H2).astype(np.float32))

        # hidden3 -> output
        V = tf.Variable(tf.random_normal(shape=(H2, 1)))
        c = tf.Variable(np.zeros(1).astype(np.float32))
        
        #Adding dropouts
        training = tf.placeholder_with_default(False, shape=(), name='training')
        dropout_rate = 0.95  # == 1 - keep_prob
        #X_drop = tf.layers.dropout(X, dropout_rate, training=training)

        # construct the reconstruction
        Z = tf.nn.tanh(tf.matmul(X , W) + b)
        
        Z_drop = tf.layers.dropout(Z, dropout_rate, training=training)
        Zh1 = tf.nn.relu(tf.matmul(Z_drop, W1) + b1) 
        
        Zh1_drop = tf.layers.dropout(Zh1, dropout_rate, training=training)
        #Zh2 = tf.nn.tanh(tf.matmul(Zh1_drop, W2) + b2) 
        
        #Zh2_drop = tf.layers.dropout(Zh2, dropout_rate, training=training)
        logits = tf.matmul(Zh1_drop, V) + c
        X_hat = (logits)

        cost = tf.losses.mean_squared_error(logits, y) + tf.nn.l2_loss(V) 
        
        # make the trainer, using RMS optimiser
        train_op = tf.train.RMSPropOptimizer(learning_rate=0.00001).minimize(cost)

        # set up session and initialise variables
        init_op = tf.global_variables_initializer()
        n_epochs = 400
        
        #When deploying in GPU enabled machines, currently using GCP compute machine
        gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(gpu_options=gpu_options)
        config.gpu_options.per_process_gpu_memory_fraction = 0.9

        with tf.Session(config=config) as sess:
            sess.run(init_op)
            while n_epochs > 0:
                batch_sz = 125
                n_batches = len(X_train)//batch_sz
                rnd_idx = np.random.permutation(len(X_train))
                for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_sz): 
                    #batch_X = X_train[j*batch_sz:(j+1)*batch_sz]
                    #batch_y = y_train[j*batch_sz:(j+1)*batch_sz]
                    batch_X, batch_y = X_train[rnd_indices], y_train[rnd_indices]
                    sess.run(train_op, feed_dict={X:batch_X, y:batch_y, training: True})  
                if n_epochs%10 == 0:
                    print(n_epochs, ' Loss: ', sess.run(cost, feed_dict={X:batch_X, y:batch_y}))
                n_epochs = n_epochs - 1
            res = sess.run(X_hat, feed_dict={X:X_test}) 
        print('\n')
        print('Predicted vs Actual Values', '\n') 
        for i, test in enumerate(X_test):
            pred = round(clf.inverse_transform(np.append(test, res[i]))[-1],2)
            real = round(clf.inverse_transform(np.append(test, y_test[i]))[-1],2)
            print(pred, real)
             
                                   
if __name__ == '__main__':
    with open('new_data.pickle', 'rb') as handle:
        new_data = pickle.load(handle)
    with open('test_records.pickle', 'rb') as handle:
        test_data = pickle.load(handle)    
    augementedTrainModel(np.array(new_data), np.array(test_data) ) 

400  Loss:  212646.06
390  Loss:  181022.0
380  Loss:  187329.78
370  Loss:  158733.36
360  Loss:  113318.08
350  Loss:  104555.19
340  Loss:  108691.69
330  Loss:  78295.17
320  Loss:  78110.22
310  Loss:  87130.234
300  Loss:  79632.75
290  Loss:  62102.805
280  Loss:  46384.03
270  Loss:  57186.516
260  Loss:  42331.133
250  Loss:  50443.83
240  Loss:  36371.156
230  Loss:  30936.605
220  Loss:  38710.87
210  Loss:  34308.49
200  Loss:  25508.488
190  Loss:  21488.484
180  Loss:  21978.348
170  Loss:  21360.012
160  Loss:  19970.494
150  Loss:  17761.242
140  Loss:  16931.24
130  Loss:  17693.107
120  Loss:  13219.082
110  Loss:  12302.67
100  Loss:  12726.773
90  Loss:  11815.604
80  Loss:  12378.538
70  Loss:  11392.732
60  Loss:  12118.16
50  Loss:  11869.362
40  Loss:  10242.857
30  Loss:  11162.77
20  Loss:  11182.174
10  Loss:  11845.91


Predicted vs Actual Values 

257742712.75 299217189.78
298949217.79 270822856.5
200601155.38 261551483.26


Not much of an improvement there, I tried all combinations like, 

- with and without dropout

- different activation functions 

- different weight initialisation values

- different number of hidden layers and hidden layer sizes

- different learning rates.

Inspite of trying all combinations the results are not much better. Hence, Sticking to the original model with 17 records  for now...

## Feedback Learning

How to make a feedback learning model??

One simple idea is inspired from Boosting algorithm in Decision Trees. Here, we give extra weight to the feedback record, by appending 2 records of the same feedback data to the dataset. Now, 2 copies of the feedback data are present in the dataset, so the model will learn this pattern more as compared to its previous version

Other methods to feedback learning are not considered for now.

In [11]:
print('Again, printing First few records of our original dateset')
df.head()

Again, printing First few records of our original dateset


Unnamed: 0,Production_Count,Area_Count,Area_Sum,Kharif,Khariff,Production_Sum
0,3415,3415,75682249.0,26.64,329.36,138280300.0
1,4563,4708,83779058.0,26.79,368.052,161040600.0
2,5351,5373,78214722.0,26.63,334.555,110785100.0
3,5981,6025,86812995.0,26.61,289.456,114236600.0
4,5846,5873,87807709.4,26.9,289.739,127710100.0


In [25]:
print('Feedback data for the year 1999 has total production 210785062.6 units instead of 110785062.6 units')

#Suppose the year 1999 record is bad... and need to be change Production_Sum from 110785062.6 to 210785062.6 
df['Production_Sum'][2]=210785062.6 
for i in range(2):
    df = df.append(df.iloc[2])
    df.reset_index(drop=True, inplace=True)

#Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

print('New dataset after appending 5 records of feedback data and shuffing it', '\n')
print(df)

Feedback data for the year 1999 has total production 210785062.6 units instead of 110785062.6 units
New dataset after appending 5 records of feedback data and shuffing it 

    Production_Count  Area_Count     Area_Sum  Kharif  Khariff  Production_Sum
0             5350.0      5479.0  82601771.00   26.60  311.016    3.030534e+08
1             3415.0      3415.0  75682249.00   26.64  329.360    1.382803e+08
2             4339.0      4397.0  52000480.50   26.87  316.974    2.708229e+08
3             5148.0      5303.0  82028612.17   26.72  336.148    2.615515e+08
4             5981.0      6025.0  86812995.00   26.61  289.456    1.142366e+08
5             5335.0      5510.0  82910545.47   26.60  352.631    2.910593e+08
6             5412.0      5552.0  80700959.42   26.44  298.543    2.399453e+08
7             5351.0      5373.0  78214722.00   26.63  334.555    2.107851e+08
8             6145.0      6161.0  88706585.43   26.61  331.896    1.382855e+08
9             4757.0      4906.0  725

In [27]:
#Feedback Learning
np_df = df.as_matrix() 
clf = StandardScaler() 
clf.fit(np_df)
np_df = clf.transform(np_df)
X = np_df[:,:-1]
y = np_df[:, -1]
#Let's have the last 3 records as Test Records , feedback data is a part of Test records
X_train, X_test = X[:-3, :], X[(-3,-2,-1), :] 
y_train, y_test = y[:-3], y[[-3,-2,-1]] 
trainModel(X_train, X_test, y_train, y_test, clf, is_feedback=True) 

  


1000  Loss:  259.93698
960  Loss:  241.49875
920  Loss:  227.77632
880  Loss:  215.77844
840  Loss:  205.35513
800  Loss:  196.35902
760  Loss:  188.66354
720  Loss:  182.1679
680  Loss:  176.78207
640  Loss:  172.4085
600  Loss:  168.91315
560  Loss:  166.15695
520  Loss:  164.01193
480  Loss:  162.38135
440  Loss:  161.19255
400  Loss:  160.37042
360  Loss:  159.82956
320  Loss:  159.45758
280  Loss:  159.15453
240  Loss:  158.88919
200  Loss:  158.6551
160  Loss:  158.44862
120  Loss:  158.26712
80  Loss:  158.10803
40  Loss:  157.9691


Predicted vs Actual Values 

238004397.51 285954655.53
232669795.76 161040570.0
224897236.23 210785062.6


If we look at the last record, which is the feedback record, the model has it learnt fairly decently

## Conclusion

-> Combined temperature data, Rainfall data and Crop production data. There are only 17 records, one record per year from 1997 to 2014

-> In the first attempt, Built a MLP neural network model using Tensorflow Libraries with these 17 records. The performance on test set is OK.

-> In the second attempt, Build a Autoencoder model to generate additional data (10000) out of these 17 records and then built another MLP neural network model using this augmented dataset(10017 records). Tried a different number of hyperparameter tuning like with and without dropouts, different learning rates, different activation functions and different initial weights. But the results were not significantly better. Hence stuck to the original model with 17 records

-> Feedback Learning: Inspired from Boosting technique. Generally feedback data is very small compared to the training dataset, hence made multiple copies of feedback data, in this case 2, and augmented the training dataset with multiple copies feedback data. Re-trained the model ( infact, restored the earlier model weights and then retrained ). This is a simple feedback learning mechanism. Since we are training on top of earlier trained model, the previous correct patterns should also have been preseved along with learning new pattern