In [1]:
# Import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [4]:
# Read csv into a dataframe & drop nan values
df = pd.read_csv(os.path.join("Resources/Autism.csv"))
df = df.dropna()
df

Unnamed: 0,Case No,A1,A2,A3,A4,A5,A6,A7,A8,A9,...,Ethnicity,Jaundice,Family_ASD,Residence,Used_App_Before,Score,Screening Type,Language,User,Class
0,6,0,0,0,0,0,0,0,0,0,...,white,no,no,Russia,no,0,17 and more,russian,parent,NO
1,10,1,1,1,1,1,1,1,1,1,...,white,no,no,Germany,no,10,17 and more,english,self,YES
2,12,1,1,0,1,1,0,0,1,0,...,black,no,no,United States,no,5,17 and more,english,self,NO
3,16,1,0,0,0,1,1,1,1,1,...,black,no,no,Mexico,no,7,17 and more,french,self,YES
4,20,1,1,1,0,0,0,0,1,0,...,aboriginal,no,no,Indonesia,no,4,17 and more,english,self,NO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1113,2486,0,1,1,0,0,0,0,0,0,...,white,no,yes,New Zealand,no,2,17 and more,english,self,NO
1114,2487,1,1,1,1,1,1,1,1,1,...,white,no,no,United Kingdom,no,10,17 and more,english,self,YES
1115,2489,1,0,0,0,0,0,1,1,1,...,white,yes,yes,United Kingdom,no,5,17 and more,english,self,NO
1116,2492,1,1,0,1,1,1,0,1,0,...,white,no,no,United States,no,7,17 and more,english,self,YES


In [5]:
X = df[['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10', 'Score']]
y = df["Class"]
print(X.shape, y.shape)

(1117, 11) (1117,)


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)
print("y:", y)
print("X:", X)

y: 0        NO
1       YES
2        NO
3       YES
4        NO
       ... 
1113     NO
1114    YES
1115     NO
1116    YES
1117     NO
Name: Class, Length: 1117, dtype: object
X:       A1  A2  A3  A4  A5  A6  A7  A8  A9  A10  Score
0      0   0   0   0   0   0   0   0   0    0      0
1      1   1   1   1   1   1   1   1   1    1     10
2      1   1   0   1   1   0   0   1   0    0      5
3      1   0   0   0   1   1   1   1   1    1      7
4      1   1   1   0   0   0   0   1   0    0      4
...   ..  ..  ..  ..  ..  ..  ..  ..  ..  ...    ...
1113   0   1   1   0   0   0   0   0   0    0      2
1114   1   1   1   1   1   1   1   1   1    1     10
1115   1   0   0   0   0   0   1   1   1    1      5
1116   1   1   0   1   1   1   0   1   0    1      7
1117   0   1   0   0   1   1   0   0   1    0      4

[1117 rows x 11 columns]


In [8]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
X_train_scaled

array([[0. , 0. , 0. , ..., 0. , 0. , 0.1],
       [0. , 0. , 1. , ..., 0. , 0. , 0.1],
       [1. , 0. , 1. , ..., 1. , 1. , 0.6],
       ...,
       [1. , 1. , 1. , ..., 0. , 0. , 0.4],
       [1. , 1. , 0. , ..., 0. , 1. , 0.6],
       [1. , 1. , 1. , ..., 1. , 1. , 1. ]])

In [9]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)
print(encoded_y_train)

[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 1 0 1 1 0 0 0 0 0 0 1 0
 1 0 1 1 0 0 1 1 1 0 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0
 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 1 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0 0 0 0 1 0 0 1
 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 0
 1 0 0 0 1 0 1 0 0 1 0 0 0 1 1 1 0 1 0 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1 0 1 1
 1 1 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 0
 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 0 1
 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 1 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 0 0 0 0 0
 0 1 1 0 0 1 0 1 1 0 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0
 0 0 0 1 0 0 1 1 0 0 0 0 

In [10]:
y_train.unique()

array(['NO', 'YES'], dtype=object)

In [11]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
print(y_train_categorical)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [1. 0.]
 [1. 0.]
 [0. 1.]]


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [13]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [14]:
print(y_test_categorical.shape)
print(y_train_categorical.shape)

(280, 2)
(837, 2)


In [15]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1200      
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 202       
Total params: 11,502
Trainable params: 11,502
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Train on 837 samples
Epoch 1/60
837/837 - 1s - loss: 0.5832 - accuracy: 0.7754
Epoch 2/60
837/837 - 0s - loss: 0.3220 - accuracy: 0.9283
Epoch 3/60
837/837 - 0s - loss: 0.1688 - accuracy: 0.9522
Epoch 4/60
837/837 - 0s - loss: 0.1045 - accuracy: 0.9713
Epoch 5/60
837/837 - 0s - loss: 0.0713 - accuracy: 0.9869
Epoch 6/60
837/837 - 0s - loss: 0.0579 - accuracy: 0.9928
Epoch 7/60
837/837 - 0s - loss: 0.0435 - accuracy: 0.9964
Epoch 8/60
837/837 - 0s - loss: 0.0372 - accuracy: 0.9940
Epoch 9/60
837/837 - 0s - loss: 0.0297 - accuracy: 1.0000
Epoch 10/60
837/837 - 0s - loss: 0.0293 - accuracy: 0.9988
Epoch 11/60
837/837 - 0s - loss: 0.0218 - accuracy: 1.0000
Epoch 12/60
837/837 - 0s - loss: 0.0185 - accuracy: 1.0000
Epoch 13/60
837/837 - 0s - loss: 0.0155 - accuracy: 1.0000
Epoch 14/60
837/837 - 0s - loss: 0.0135 - accuracy: 1.0000
Epoch 15/60
837/837 - 0s - loss: 0.0121 - accuracy: 1.0000
Epoch 16/60
837/837 - 0s - loss: 0.0116 - accuracy: 1.0000
Epoch 17/60
837/837 - 0s - loss: 0.0098 - ac

<tensorflow.python.keras.callbacks.History at 0x1f99bda8cc0>

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1200      
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 202       
Total params: 11,502
Trainable params: 11,502
Non-trainable params: 0
_________________________________________________________________


In [19]:
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

280/280 - 0s - loss: 0.0177 - accuracy: 0.9929
Loss: 0.017658405657857656, Accuracy: 0.9928571581840515
