In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report




In [2]:
df = pd.read_csv("C:/Users/tjjaf/Desktop/ML Projects/Autism Prediction - Neural Network/autism_screening.csv")

In [3]:
df.head()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,...,gender,ethnicity,jundice,austim,contry_of_res,used_app_before,result,age_desc,relation,Class/ASD
0,1,1,1,1,0,0,1,1,0,0,...,f,White-European,no,no,United States,no,6.0,18 and more,Self,NO
1,1,1,0,1,0,0,0,1,0,1,...,m,Latino,no,yes,Brazil,no,5.0,18 and more,Self,NO
2,1,1,0,1,1,0,1,1,1,1,...,m,Latino,yes,yes,Spain,no,8.0,18 and more,Parent,YES
3,1,1,0,1,0,0,1,1,0,1,...,f,White-European,no,yes,United States,no,6.0,18 and more,Self,NO
4,1,0,0,0,0,0,0,1,0,0,...,f,?,no,no,Egypt,no,2.0,18 and more,?,NO


In [4]:
df.shape

(704, 21)

In [5]:
df = df.rename(columns = {'austim' : 'autism', 'jundice' : 'jaundice', 'contry_of_res' : 'country_of_res'})
df.head()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,...,gender,ethnicity,jaundice,autism,country_of_res,used_app_before,result,age_desc,relation,Class/ASD
0,1,1,1,1,0,0,1,1,0,0,...,f,White-European,no,no,United States,no,6.0,18 and more,Self,NO
1,1,1,0,1,0,0,0,1,0,1,...,m,Latino,no,yes,Brazil,no,5.0,18 and more,Self,NO
2,1,1,0,1,1,0,1,1,1,1,...,m,Latino,yes,yes,Spain,no,8.0,18 and more,Parent,YES
3,1,1,0,1,0,0,1,1,0,1,...,f,White-European,no,yes,United States,no,6.0,18 and more,Self,NO
4,1,0,0,0,0,0,0,1,0,0,...,f,?,no,no,Egypt,no,2.0,18 and more,?,NO


In [6]:
df.nunique()

A1_Score            2
A2_Score            2
A3_Score            2
A4_Score            2
A5_Score            2
A6_Score            2
A7_Score            2
A8_Score            2
A9_Score            2
A10_Score           2
age                46
gender              2
ethnicity          12
jaundice            2
autism              2
country_of_res     67
used_app_before     2
result             11
age_desc            1
relation            6
Class/ASD           2
dtype: int64

In [7]:
df.drop(columns = ['age_desc'], inplace = True)
df.head()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,age,gender,ethnicity,jaundice,autism,country_of_res,used_app_before,result,relation,Class/ASD
0,1,1,1,1,0,0,1,1,0,0,26.0,f,White-European,no,no,United States,no,6.0,Self,NO
1,1,1,0,1,0,0,0,1,0,1,24.0,m,Latino,no,yes,Brazil,no,5.0,Self,NO
2,1,1,0,1,1,0,1,1,1,1,27.0,m,Latino,yes,yes,Spain,no,8.0,Parent,YES
3,1,1,0,1,0,0,1,1,0,1,35.0,f,White-European,no,yes,United States,no,6.0,Self,NO
4,1,0,0,0,0,0,0,1,0,0,40.0,f,?,no,no,Egypt,no,2.0,?,NO


In [8]:
print(f'Max Age in Data:', df['age'].max())

print(f'Min Age in Data:', df['age'].min())

Max Age in Data: 383.0
Min Age in Data: 17.0


In [9]:
df[df['age'] == df['age'].max()]

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,age,gender,ethnicity,jaundice,autism,country_of_res,used_app_before,result,relation,Class/ASD
52,1,0,0,0,0,0,0,0,0,0,383.0,f,Pasifika,no,no,New Zealand,no,1.0,Self,NO


In [10]:
df.drop(index = 52, inplace = True)
df.reset_index(inplace = True)

In [11]:
df.isnull().sum()

index              0
A1_Score           0
A2_Score           0
A3_Score           0
A4_Score           0
A5_Score           0
A6_Score           0
A7_Score           0
A8_Score           0
A9_Score           0
A10_Score          0
age                2
gender             0
ethnicity          0
jaundice           0
autism             0
country_of_res     0
used_app_before    0
result             0
relation           0
Class/ASD          0
dtype: int64

In [12]:
df['age'] = df['age'].fillna(np.round(df['age'].mean(), 0))
pd.DataFrame(df.isnull().sum(), columns = ['Missing Values'])

Unnamed: 0,Missing Values
index,0
A1_Score,0
A2_Score,0
A3_Score,0
A4_Score,0
A5_Score,0
A6_Score,0
A7_Score,0
A8_Score,0
A9_Score,0


In [13]:
for col in df.select_dtypes('O').columns:
    print('---------------------------------------------------------------------------')
    print(f'Column name: {col}\n')
    print(f'Unique values:\n{df[col].unique()}\n')

---------------------------------------------------------------------------
Column name: gender

Unique values:
['f' 'm']

---------------------------------------------------------------------------
Column name: ethnicity

Unique values:
['White-European' 'Latino' '?' 'Others' 'Black' 'Asian' 'Middle Eastern '
 'Pasifika' 'South Asian' 'Hispanic' 'Turkish' 'others']

---------------------------------------------------------------------------
Column name: jaundice

Unique values:
['no' 'yes']

---------------------------------------------------------------------------
Column name: autism

Unique values:
['no' 'yes']

---------------------------------------------------------------------------
Column name: country_of_res

Unique values:
['United States' 'Brazil' 'Spain' 'Egypt' 'New Zealand' 'Bahamas'
 'Burundi' 'Austria' 'Argentina' 'Jordan' 'Ireland' 'United Arab Emirates'
 'Afghanistan' 'Lebanon' 'United Kingdom' 'South Africa' 'Italy'
 'Pakistan' 'Bangladesh' 'Chile' 'France' 'China' 

In [14]:
df['ethnicity'] = df['ethnicity'].replace('?', 'Others')
df['ethnicity'] = df['ethnicity'].replace('others', 'Others')
df['ethnicity'].unique()

array(['White-European', 'Latino', 'Others', 'Black', 'Asian',
       'Middle Eastern ', 'Pasifika', 'South Asian', 'Hispanic',
       'Turkish'], dtype=object)

In [15]:
df['relation'] = df['relation'].replace('?', 'Others')
df['relation'] = df['relation'].replace('Health care professional', 'Health Care Professional')
df['relation'].unique()

array(['Self', 'Parent', 'Others', 'Health Care Professional', 'Relative'],
      dtype=object)

In [16]:
df.shape

(703, 21)

In [17]:
Y = df['Class/ASD']

X = df[['age', 'gender', 'ethnicity', 'jaundice', 'autism', 'country_of_res', 'result', 'relation', 'A1_Score','A2_Score','A3_Score','A4_Score','A5_Score','A6_Score','A7_Score','A8_Score', 'A9_Score','A10_Score']]

In [18]:
scaler = MinMaxScaler()

numb = ['age', 'result']
features_transform = pd.DataFrame(data = X)
features_transform[numb] = scaler.fit_transform(X[numb])

display(features_transform.head())

Unnamed: 0,age,gender,ethnicity,jaundice,autism,country_of_res,result,relation,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score
0,0.191489,f,White-European,no,no,United States,0.6,Self,1,1,1,1,0,0,1,1,0,0
1,0.148936,m,Latino,no,yes,Brazil,0.5,Self,1,1,0,1,0,0,0,1,0,1
2,0.212766,m,Latino,yes,yes,Spain,0.8,Parent,1,1,0,1,1,0,1,1,1,1
3,0.382979,f,White-European,no,yes,United States,0.6,Self,1,1,0,1,0,0,1,1,0,1
4,0.489362,f,Others,no,no,Egypt,0.2,Others,1,0,0,0,0,0,0,1,0,0


In [19]:
X_encoded = pd.get_dummies(features_transform)
X_encoded.head()

Unnamed: 0,age,result,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,...,country_of_res_United Arab Emirates,country_of_res_United Kingdom,country_of_res_United States,country_of_res_Uruguay,country_of_res_Viet Nam,relation_Health Care Professional,relation_Others,relation_Parent,relation_Relative,relation_Self
0,0.191489,0.6,1,1,1,1,0,0,1,1,...,False,False,True,False,False,False,False,False,False,True
1,0.148936,0.5,1,1,0,1,0,0,0,1,...,False,False,False,False,False,False,False,False,False,True
2,0.212766,0.8,1,1,0,1,1,0,1,1,...,False,False,False,False,False,False,False,True,False,False
3,0.382979,0.6,1,1,0,1,0,0,1,1,...,False,False,True,False,False,False,False,False,False,True
4,0.489362,0.2,1,0,0,0,0,0,0,1,...,False,False,False,False,False,False,True,False,False,False


In [20]:
Y_encoded = Y.apply(lambda x : 1 if x == 'YES' else 0)
Y_encoded.head()

0    0
1    0
2    1
3    0
4    0
Name: Class/ASD, dtype: int64

In [21]:
X_train, X_test, Y_train, Y_test = train_test_split(X_encoded, Y_encoded, test_size = 0.3, random_state = 42)

X_train = np.asarray(X_train).astype(np.float32)
Y_train = np.asarray(Y_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)
Y_test = np.asarray(Y_test).astype(np.float32)

print(f'Shape of X_train is : {X_train.shape}')
print(f'Shape of Y_train is : {Y_train.shape}\n')
print(f'Shape of X_test is : {X_test.shape}')
print(f'Shape of Y_test is : {Y_test.shape}')

Shape of X_train is : (492, 100)
Shape of Y_train is : (492,)

Shape of X_test is : (211, 100)
Shape of Y_test is : (211,)


In [28]:
input_dim = X_encoded.shape[1]

model = Sequential()
model.add(Dense(16, input_dim=input_dim, activation='sigmoid'))
model.add(LeakyReLU(alpha=0.01))
model.add(Dense(8))
model.add(LeakyReLU(alpha=0.01))
model.add(Dense(5))
model.add(LeakyReLU(alpha=0.01))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 16)                1616      
                                                                 
 leaky_re_lu_6 (LeakyReLU)   (None, 16)                0         
                                                                 
 dense_9 (Dense)             (None, 8)                 136       
                                                                 
 leaky_re_lu_7 (LeakyReLU)   (None, 8)                 0         
                                                                 
 dense_10 (Dense)            (None, 5)                 45        
                                                                 
 leaky_re_lu_8 (LeakyReLU)   (None, 5)                 0         
                                                                 
 dense_11 (Dense)            (None, 1)                

In [29]:
train = model.fit(X_train, Y_train, epochs = 50, batch_size = 10)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [30]:
predictions = model.predict(X_test)



In [31]:
binary_predictions = (predictions > 0.5).astype(int)

cm = confusion_matrix(Y_test, binary_predictions)
cm

array([[141,   6],
       [  3,  61]], dtype=int64)

In [32]:
print(classification_report(Y_test, binary_predictions))

              precision    recall  f1-score   support

         0.0       0.98      0.96      0.97       147
         1.0       0.91      0.95      0.93        64

    accuracy                           0.96       211
   macro avg       0.94      0.96      0.95       211
weighted avg       0.96      0.96      0.96       211

