In [1]:
from numpy import asarray
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from autokeras import StructuredDataClassifier

In [2]:
# load dataset
names = ['temp', 'dpTemp' ,'RH', 'WDIR', 'VIZ', 'Label']
dataframe = read_csv('./oversampler.csv', names=names)
print(dataframe.shape)

(6977, 6)


In [3]:
# split into input and output elements
data = dataframe.values
x = data[:,0:4]
y = data[:,5]
print(x.shape, y.shape)

(6977, 4) (6977,)


In [4]:
# basic data preparation
x = x.astype('float32')

In [5]:
# separate into train and test sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(4674, 4) (2303, 4) (4674,) (2303,)


In [6]:
# define the search
search = StructuredDataClassifier(max_trials=100, overwrite=True)

In [7]:
# perform the search
search.fit(x=X_train, y=y_train, verbose=2, epochs=5)

Trial 7 Complete [00h 00m 06s]
val_accuracy: 0.8685969114303589

Best val_accuracy So Far: 0.916481077671051
Total elapsed time: 00h 00m 41s

Search: Running Trial #8

Hyperparameter    |Value             |Best Value So Far 
structured_data...|True              |True              
structured_data...|True              |False             
structured_data...|2                 |2                 
structured_data...|32                |32                
structured_data...|0                 |0                 
structured_data...|32                |32                
classification_...|0                 |0                 
optimizer         |adam_weight_decay |adam_weight_decay 
learning_rate     |0.001             |0.001             

Epoch 1/5


In [8]:
# evaluate the model
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.3f' % acc)

Accuracy: 0.910


In [9]:
# use the model to make a prediction
row = [15.1,14.9,99,50]
X_new = asarray([row]).astype('float32')
yhat = search.predict(X_new)
print('Predicted: %.3f' % yhat[0])

Predicted: 0.000


In [10]:
# get the best performing model
model = search.export_model()

In [11]:
# summarize the loaded model
model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 4)]               0         
                                                                 
 multi_category_encoding (Mu  (None, 4)                0         
 ltiCategoryEncoding)                                            
                                                                 
 normalization (Normalizatio  (None, 4)                9         
 n)                                                              
                                                                 
 dense (Dense)               (None, 32)                160       
                                                                 
 re_lu (ReLU)                (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                1056  

In [12]:
# save the best performing model to filetry:
try:
    model.save("modelOverSampling", save_format="tf")
except Exception:
    model.save("modelOverSampling.h5")

INFO:tensorflow:Assets written to: modelOverSampling\assets
