# Model 2

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

import os,sys,inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir) 

from Extra_Work.PythonFiles.preprocessing import scale_data
from Extra_Work.PythonFiles.modelling import compile_model

from tensorflow.keras.utils import to_categorical

In [2]:
df = pd.read_csv('../Data/dataset2.csv')
df.head()

Unnamed: 0,Arm length (m),Ball weight (kg),Ball radius (mm),Air temperature (deg C),Spring constant (N per m),Device weight (kg),Target hit
0,0.313463,0.317565,30.429217,19.0,571.817843,5.876016,1.0
1,0.199977,0.387987,36.200062,24.0,430.608228,0.591052,1.0
2,0.146253,0.428552,59.157785,26.0,1733.40623,4.975027,1.0
3,0.143222,0.417935,48.55341,16.0,490.551664,7.03169,1.0
4,0.089183,0.463229,70.498961,19.0,1387.375798,5.738063,1.0


In [3]:
X, scalers = scale_data(df[df.columns[:-1]], mode = 'minmax')

X.head()

Unnamed: 0,Arm length (m),Ball weight (kg),Ball radius (mm),Air temperature (deg C),Spring constant (N per m),Device weight (kg)
0,0.415731,0.465638,0.0766,0.521739,0.174775,0.716891
1,0.236388,0.572463,0.098531,0.73913,0.125846,0.011938
2,0.151487,0.633997,0.185777,0.826087,0.577261,0.59671
3,0.146696,0.617891,0.145477,0.391304,0.146617,0.871045
4,0.061298,0.6866,0.228876,0.521739,0.457363,0.69849


In [4]:
y = df[df.columns[-1]]
y.head()

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
Name: Target hit, dtype: float64

In [5]:
y_binary = to_categorical(y)
y_binary


array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y_binary, test_size = 0.8, random_state = 23)

# Modelling cell

This is imported from compile model

In [7]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import LayerNormalization, BatchNormalization, Dense
tf.random.set_seed(1)


def uncompiled_model():
    inputs = Input(shape=(6,), name='Data')
    x = Dense(128, activation='relu', name='dense_1')(inputs)
    x = BatchNormalization()(x)
    x = Dense(128, activation='relu', name='dense_3')(x)
    x = BatchNormalization()(x)


    outputs = Dense(2, activation='softmax', name='TargetHit')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model


def compile_model():
    model = uncompiled_model()
    model.compile(
        optimizer="sgd",
        loss="logcosh",
        metrics=["accuracy",
                 "binary_accuracy",
                 "binary_crossentropy",
                 ],
    )
    return model


X_temp = X.copy()
X_temp[['Ball weight (kg)','Arm length (m)']] = 0
print(X_temp)
X_train, X_test, y_train, y_test = train_test_split(X_temp,y_binary, test_size = 0.8, random_state = 23)

      Arm length (m)  Ball weight (kg)  Ball radius (mm)  \
0                0.0               0.0          0.076600   
1                0.0               0.0          0.098531   
2                0.0               0.0          0.185777   
3                0.0               0.0          0.145477   
4                0.0               0.0          0.228876   
...              ...               ...               ...   
3995             0.0               0.0          0.100360   
3996             0.0               0.0          0.151871   
3997             0.0               0.0          0.095969   
3998             0.0               0.0          0.074095   
3999             0.0               0.0          0.116702   

      Air temperature (deg C)  Spring constant (N per m)  Device weight (kg)  
0                    0.521739                   0.174775            0.716891  
1                    0.739130                   0.125846            0.011938  
2                    0.826087             

In [8]:
model = compile_model()
model.fit(X_train, y_train, epochs = 350, verbose = 0)
score = model.evaluate(X_test, y_test, verbose=1)
score



[0.1334667056798935,
 0.5459374785423279,
 0.5459374785423279,
 0.8270338177680969]

In [13]:
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Data (InputLayer)            [(None, 6)]               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               896       
_________________________________________________________________
batch_normalization (BatchNo (None, 128)               512       
_________________________________________________________________
dense_3 (Dense)              (None, 128)               16512     
_________________________________________________________________
batch_normalization_1 (Batch (None, 128)               512       
_________________________________________________________________
TargetHit (Dense)            (None, 2)                 258       
Total params: 18,690
Trainable params: 18,178
Non-trainable params: 512
________________________________________________

In [16]:
model.count_params()

18690

In [9]:
"""import tensorflow as tf
import autokeras as ak
import numpy as np
X_train, X_test, y_train, y_test = train_test_split(X_temp,y, test_size = 0.8, random_state = 23)



train_set = tf.data.Dataset.from_tensor_slices((X_train.astype(np.unicode), y_train))
test_set = tf.data.Dataset.from_tensor_slices((X_test.to_numpy().astype(np.unicode), y_test))

clf = ak.StructuredDataClassifier(
    overwrite=True,
    max_trials=3)
# Feed the tensorflow Dataset to the classifier.
clf.fit(train_set, epochs=5)
# Predict with the best model.
predicted_y = clf.predict(test_set)
# Evaluate the best model with testing data.
print(clf.evaluate(test_set))"""

'import tensorflow as tf\nimport autokeras as ak\nimport numpy as np\nX_train, X_test, y_train, y_test = train_test_split(X_temp,y, test_size = 0.8, random_state = 23)\n\n\n\ntrain_set = tf.data.Dataset.from_tensor_slices((X_train.astype(np.unicode), y_train))\ntest_set = tf.data.Dataset.from_tensor_slices((X_test.to_numpy().astype(np.unicode), y_test))\n\nclf = ak.StructuredDataClassifier(\n    overwrite=True,\n    max_trials=3)\n# Feed the tensorflow Dataset to the classifier.\nclf.fit(train_set, epochs=5)\n# Predict with the best model.\npredicted_y = clf.predict(test_set)\n# Evaluate the best model with testing data.\nprint(clf.evaluate(test_set))'

In [10]:
"""model = clf.export_model()
model.summary()"""

'model = clf.export_model()\nmodel.summary()'

# Sources for later

1. https://d4datascience.wordpress.com/2016/09/29/fbf/
2. https://towardsdatascience.com/machine-learning-part-20-dropout-keras-layers-explained-8c9f6dc4c9ab

Things to try:> try to initialize the weights for the very collinear attributes to see if you can impact them? Maybe set the intial weights to 0?


In [11]:
# look into using 1 output classifier
# look into using the loss functions but /w different values of gamma beta and inertias
# as well as learning rates!

# according to research, apparently the only activation function that is compatible wit
# bin_cross_entropy is the sigmoid