### 1. Library import and dataset loading

In [2]:
import pandas as pd
import autokeras as ak
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split



df = pd.read_csv('../data/df_prepped.csv')
df_label = df.loc[:,['Countries','Farm']]
df = df.drop(['Countries','Farm'], axis=1)


Using TensorFlow backend


### 2. Separating Train and Test Data


In [3]:
df_test = df[df.Year == 2016].sort_values('Year')
df_train = df[df.Year != 2016]


### 3. Preparing Data for AutoKeras

AutoKeras requires data to have separated features and target variable.

In [4]:
# Check columns in df_train and df_test
print("Columns in df_train:", df_train.columns)
print("Columns in df_test:", df_test.columns)

Columns in df_train: Index(['Year', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Sand_5', 'Sand_6',
       'Sand_7', 'Clay_1', 'Clay_2', 'Clay_3', 'Clay_4', 'Clay_5', 'Clay_6',
       'Clay_7', 'OC_1', 'OC_2', 'OC_3', 'OC_4', 'OC_5', 'OC_6', 'OC_7',
       'PAW_1', 'PAW_2', 'PAW_3', 'PAW_4', 'PAW_5', 'PAW_6', 'PAW_7',
       'Y_maize_major', 'Sow_Maize_month_int', 'Harvest_Maize_month_int',
       'sow_to_harvest_months', 'maize_lag-1', 'pcp_mean_lag-1',
       'tmax_mean_lag-1', 'tmin_mean_lag-1', 'spi_mean_lag-1', 'maize_lag-2',
       'pcp_mean_lag-2', 'tmax_mean_lag-2', 'tmin_mean_lag-2',
       'spi_mean_lag-2', 'maize_lag-3', 'pcp_mean_lag-3', 'tmax_mean_lag-3',
       'tmin_mean_lag-3', 'spi_mean_lag-3'],
      dtype='object')
Columns in df_test: Index(['Year', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Sand_5', 'Sand_6',
       'Sand_7', 'Clay_1', 'Clay_2', 'Clay_3', 'Clay_4', 'Clay_5', 'Clay_6',
       'Clay_7', 'OC_1', 'OC_2', 'OC_3', 'OC_4', 'OC_5', 'OC_6', 'OC_7',
       'PAW_1',

In [5]:
y_train = df_train.pop('Y_maize_major')
y_test = df_test.pop('Y_maize_major')

x_train = df_train
x_test = df_test


### 4. Defining the AutoKeras Model
Here, we  define the type of model we want AutoKeras to search for. For a regression task, we are using StructuredDataRegressor.

In [6]:
reg = ak.StructuredDataRegressor(max_trials=40, loss='mean_absolute_error')


Reloading Tuner from .\structured_data_regressor\tuner0.json


### 5. Training the Model
Fit the model on the training data.

In [7]:
reg.fit(x_train, y_train, epochs=30)

Trial 46 Complete [00h 03m 49s]
val_loss: 0.27093398571014404

Best val_loss So Far: 0.25803256034851074
Total elapsed time: 09h 08m 36s
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30




INFO:tensorflow:Assets written to: .\structured_data_regressor\best_model\assets


INFO:tensorflow:Assets written to: .\structured_data_regressor\best_model\assets


<keras.callbacks.History at 0x2d8a4d92bc0>

### 6. Evaluating and Predicting

In [13]:
loss, mae = reg.evaluate(x_test, y_test)
print('MAE:', mae)

predictions = reg.predict(x_test)

MAE: 0.11300051212310791


In [14]:
import math

mse = 0.1130
rmse = math.sqrt(mse)
print("RMSE:", rmse)


RMSE: 0.3361547262794322


### 7. Exporting the Model

In [9]:
model = reg.export_model()
model.save('../models/autokeras_best_model.tf')



INFO:tensorflow:Assets written to: ../models/autokeras_best_model.tf\assets


INFO:tensorflow:Assets written to: ../models/autokeras_best_model.tf\assets


In [10]:
model

<keras.engine.functional.Functional at 0x2d8703a5180>