In [1]:
# import essential libraries
import os
import pandas as pd
import time
import numpy as np
import pylab as plt
import seaborn as sb
import warnings
warnings.filterwarnings("ignore")

# tensorflow libraries
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential, optimizers
from tensorflow.keras.models import save_model, load_model
from tensorflow.keras.layers import Dense, Dropout

# sklearn libraries are useful for preprocessing, performance measures, etc.
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('../input/crop-recommendation-dataset/Crop_recommendation.csv')
print("Shape of dataset:", df.shape)

df.head()

# Understanding the Dataset

In [3]:
print('Number of classes:', len(df['label'].value_counts()))
print('Number of columns:', len(df.columns), '\n')
df['label'].value_counts()

In [4]:
df.dtypes

# Data Preparation

In [5]:
columns_to_drop = ['label']

def prepare_dataset(df, columns_to_drop, test_size, random_state):

    # Encode the labels from 0 to n_classes-1  
    label_encoder = preprocessing.LabelEncoder()
    df['label'] = label_encoder.fit_transform(df['label'])
    
    encode_dict = {}
    count = 1
    for i in label_encoder.classes_:
        encode_dict[i] = count
        count += 1
  
    # devide data to train and test
    df_train, df_test = train_test_split(df, test_size=test_size, random_state=random_state)
    
    # scale the training inputs
    x_train = df_train.drop(columns_to_drop,axis=1)
    y_train = df_train['label'].to_numpy()
    
    standard_scaler = preprocessing.StandardScaler()
    x_train_scaled = standard_scaler.fit_transform(x_train)

    #scale and prepare testing data
    x_test = df_test.drop(columns_to_drop,axis=1)
    x_test_scaled = standard_scaler.transform(x_test)
    y_test = df_test['label'].to_numpy() 
  
    return x_train_scaled, y_train, x_test_scaled, y_test, encode_dict

In [6]:
X_train, y_train, X_test, y_test, encoder = prepare_dataset(df, columns_to_drop, test_size=0.3, random_state=0)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [7]:
encoder

# Model Training

In [8]:
# Initialise parameters for the model
num_features = X_train.shape[1]
num_labels = len(df['label'].value_counts())
num_hidden = 16
batch_size = 1
num_epochs = 50

SEED = 100
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [9]:
cb = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy', min_delta=0, patience=10, verbose=1,
    mode='auto', baseline=None, restore_best_weights=False
)

In [10]:
def train_model(train_X, test_X, train_Y, test_Y, num_neuron, num_classes, epoch_count, batch_size, callback):
    # Declare network layers, compile and train
    model = Sequential()
    model.add(Dense(num_neuron, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(num_neuron, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(train_X, train_Y, epochs=epoch_count, verbose=1, batch_size=batch_size, validation_data=(test_X, test_Y), callbacks=[callback])
    # history = model.fit(train_X, train_Y, epochs=epoch_count, verbose=1, batch_size=batch_size, validation_data=(test_X, test_Y))

    # Get accuracy and loss results, and plot them onto a diagram
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(acc))

    print("\nTest Accuracy:", val_acc[len(val_acc)-1])

    plt.plot(epochs, acc, 'r', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Test accuracy')
    plt.title(f'Training and Test accuracy for {str(num_neuron)} neurons with 3-layers')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc=0)
    plt.figure()
    plt.show()

    plt.plot(epochs, loss, 'r', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Test loss')
    plt.title(f'Training and Test loss for {str(num_neuron)} neurons with 3-layers')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc=0)
    plt.figure()
    plt.show()
    
    return model

In [11]:
model = train_model(X_train, X_test, y_train, y_test, num_hidden, num_labels, num_epochs, batch_size, cb)

In [12]:
# Save the model
filepath = './task1_saved_model.h5'
save_model(model, filepath)

# Model Prediction

In [13]:
checkpoint = './task1_saved_model.h5'
trained_model = load_model(checkpoint, compile = True)

In [19]:
test_input = np.array([[1.46619029, -1.19315122, 0.0380808, 0.15766055, 0.73809356, -0.07310227, -0.78881662]])

predictions = trained_model.predict(test_input)

result = {}
for i,j in zip(predictions[0], encoder.keys()):
    result[j] = '%.10f' % i
    
    if np.argmax(predictions, axis = 1)[0] - 1 == encoder[j]:
        print(j, encoder[j], ':', i, '- highest')
    else:
        print(j, encoder[j], ':', i)

In [20]:
result

In [16]:
# for i, j in zip(classes, y_test):
#     if i != j:
#         print(i, j)

# print(classes[0], y_test[0])