### 1. Library import and dataset loading

In [1]:
import pandas as pd
import autokeras as ak
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split



df = pd.read_csv('../data/df_prepped.csv')
df_label = df.loc[:,['Countries','Farm']]
df = df.drop(['Countries','Farm'], axis=1)


Using TensorFlow backend


### 2. Separating Train and Test Data


In [2]:
df_test = df[df.Year == 2016].sort_values('Year')
df_train = df[df.Year != 2016]


### 3. Preparing Data for AutoKeras

AutoKeras requires data to have separated features and target variable.

In [3]:
# Check columns in df_train and df_test
print("Columns in df_train:", df_train.columns)
print("Columns in df_test:", df_test.columns)

Columns in df_train: Index(['Year', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Sand_5', 'Sand_6',
       'Sand_7', 'Clay_1', 'Clay_2', 'Clay_3', 'Clay_4', 'Clay_5', 'Clay_6',
       'Clay_7', 'OC_1', 'OC_2', 'OC_3', 'OC_4', 'OC_5', 'OC_6', 'OC_7',
       'PAW_1', 'PAW_2', 'PAW_3', 'PAW_4', 'PAW_5', 'PAW_6', 'PAW_7',
       'Y_maize_major', 'Sow_Maize_month_int', 'Harvest_Maize_month_int',
       'sow_to_harvest_months', 'maize_lag-1', 'pcp_mean_lag-1',
       'tmax_mean_lag-1', 'tmin_mean_lag-1', 'spi_mean_lag-1', 'maize_lag-2',
       'pcp_mean_lag-2', 'tmax_mean_lag-2', 'tmin_mean_lag-2',
       'spi_mean_lag-2', 'maize_lag-3', 'pcp_mean_lag-3', 'tmax_mean_lag-3',
       'tmin_mean_lag-3', 'spi_mean_lag-3'],
      dtype='object')
Columns in df_test: Index(['Year', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Sand_5', 'Sand_6',
       'Sand_7', 'Clay_1', 'Clay_2', 'Clay_3', 'Clay_4', 'Clay_5', 'Clay_6',
       'Clay_7', 'OC_1', 'OC_2', 'OC_3', 'OC_4', 'OC_5', 'OC_6', 'OC_7',
       'PAW_1',

In [4]:
y_train = df_train.pop('Y_maize_major')
y_test = df_test.pop('Y_maize_major')

x_train = df_train
x_test = df_test


### 4. Defining the AutoKeras Model
Here, we  define the type of model we want AutoKeras to search for. For a regression task, we are using StructuredDataRegressor.

In [5]:
reg = ak.StructuredDataRegressor(max_trials=40, loss='mean_absolute_error')


### 5. Training the Model
Fit the model on the training data.

In [6]:
reg.fit(x_train, y_train, epochs=30)

Trial 40 Complete [00h 00m 51s]
val_loss: 0.3833455741405487

Best val_loss So Far: 0.2706042528152466
Total elapsed time: 01h 20m 08s
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: .\structured_data_regressor\best_model\assets


<keras.callbacks.History at 0x2954fd8b580>

### 6. Evaluating and Predicting

In [7]:
from sklearn.metrics import mean_absolute_error

loss, mse = reg.evaluate(x_test, y_test)
print('MSE:', mse)

predictions = reg.predict(x_test)
mae = mean_absolute_error(y_test, predictions)
print('MAE:', mae)

MSE: 0.16657422482967377
MAE: 0.33615129542020966


In [8]:
import math

rmse = math.sqrt(mse)
print("RMSE:", rmse)


RMSE: 0.4081350570946752


### 7. Exporting the Model

In [9]:
model = reg.export_model()
model.save('../models/autokeras_best_model.tf')

INFO:tensorflow:Assets written to: ../models/autokeras_best_model.tf\assets


In [10]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 47)]              0         
                                                                 
 multi_category_encoding (Mu  (None, 47)               0         
 ltiCategoryEncoding)                                            
                                                                 
 dense (Dense)               (None, 32)                1536      
                                                                 
 re_lu (ReLU)                (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                                 
 re_lu_1 (ReLU)              (None, 16)                0         
                                                             

In [11]:
model_json = model.to_json()
print(model_json)


{"class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": [null, 47], "dtype": "float64", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Custom>MultiCategoryEncoding", "config": {"name": "multi_category_encoding", "trainable": true, "dtype": "float32", "encoding": ["int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "int", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none"]}, "name": "multi_category_encoding", "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 32, "activation": "linear", "use_bias": true, "kernel_initializer": {"cla

In [12]:
weights = model.get_weights()
weights

[array([b'2008.000000', b'2010.000000', b'2009.000000', b'2007.000000',
        b'2014.000000', b'2013.000000', b'2012.000000', b'2011.000000',
        b'2015.000000'], dtype=object),
 array([b'60.000000', b'59.000000', b'64.000000', b'68.000000',
        b'67.000000', b'66.000000', b'65.000000', b'63.000000',
        b'62.000000', b'57.000000', b'61.000000', b'56.000000',
        b'49.000000', b'47.000000', b'48.000000', b'53.000000',
        b'55.000000', b'58.000000', b'54.000000', b'50.000000',
        b'52.000000', b'70.000000', b'51.000000', b'46.000000',
        b'45.000000', b'42.000000', b'43.000000', b'69.000000',
        b'44.000000', b'38.000000', b'71.000000', b'41.000000',
        b'72.000000', b'40.000000', b'73.000000', b'36.000000',
        b'39.000000', b'37.000000', b'74.000000', b'75.000000',
        b'35.000000', b'77.000000', b'76.000000', b'31.000000',
        b'80.000000', b'34.000000', b'79.000000', b'32.000000',
        b'28.000000', b'78.000000', b'33.000000'