In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [2]:
data = pd.read_csv("Toddler Autism dataset.csv")
data

Unnamed: 0,Case_No,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,Age_Mons,Qchat-10-Score,Sex,Ethnicity,Jaundice,Family_mem_with_ASD,Who completed the test,Class/ASD Traits
0,1,0,0,0,0,0,0,1,1,0,1,28,3,f,middle eastern,yes,no,family member,No
1,2,1,1,0,0,0,1,1,0,0,0,36,4,m,White European,yes,no,family member,Yes
2,3,1,0,0,0,0,0,1,1,0,1,36,4,m,middle eastern,yes,no,family member,Yes
3,4,1,1,1,1,1,1,1,1,1,1,24,10,m,Hispanic,no,no,family member,Yes
4,5,1,1,0,1,1,1,1,1,1,1,20,9,f,White European,no,yes,family member,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1049,1050,0,0,0,0,0,0,0,0,0,1,24,1,f,White European,no,yes,family member,No
1050,1051,0,0,1,1,1,0,1,0,1,0,12,5,m,black,yes,no,family member,Yes
1051,1052,1,0,1,1,1,1,1,1,1,1,18,9,m,middle eastern,yes,no,family member,Yes
1052,1053,1,0,0,0,0,0,0,1,0,1,19,3,m,White European,no,yes,family member,No


In [3]:
data.isna().sum()

Case_No                   0
A1                        0
A2                        0
A3                        0
A4                        0
A5                        0
A6                        0
A7                        0
A8                        0
A9                        0
A10                       0
Age_Mons                  0
Qchat-10-Score            0
Sex                       0
Ethnicity                 0
Jaundice                  0
Family_mem_with_ASD       0
Who completed the test    0
Class/ASD Traits          0
dtype: int64

In [4]:
data=data.drop("Case_No",axis=1)

In [5]:
#EDA from here

In [6]:
# Dropping the columns which are not needed\
data = data.drop(columns=["Qchat-10-Score", "Who completed the test",])

In [7]:
data.rename(columns={data.columns[-1]: 'Answer'}, inplace=True)
data.head(2)


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,Age_Mons,Sex,Ethnicity,Jaundice,Family_mem_with_ASD,Answer
0,0,0,0,0,0,0,1,1,0,1,28,f,middle eastern,yes,no,No
1,1,1,0,0,0,1,1,0,0,0,36,m,White European,yes,no,Yes


In [8]:
X = data.drop("Answer",axis=1)
y = data["Answer"]

In [9]:
X=pd.get_dummies(X,columns=["Ethnicity"], drop_first=True)

In [10]:
# Label Encoding
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
y=le.fit_transform(y)
X['Sex']=le.fit_transform(X['Sex'])
X['Jaundice']=le.fit_transform(X['Jaundice'])
X['Family_mem_with_ASD']=le.fit_transform(X['Family_mem_with_ASD'])


In [11]:
import pickle
column_names =X.columns.tolist()
with open('output_column_names.pkl', 'wb') as f:
    pickle.dump(column_names, f)

In [12]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [13]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train_scaled=sc.fit_transform(X_train)
X_test_scaled=sc.transform(X_test)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(sc, f)

In [14]:
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score,confusion_matrix
def train_model(model, X_train, y_train, X_test, y_test):
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    matrix=confusion_matrix(y_test,y_pred)
    print("Confusion Matrix is: ")
    print(matrix)
    print("\n\nScore Table is: ")
    score_df = pd.DataFrame([[accuracy, precision, recall, f1]], 
                            columns=["accuracy", "precision", "recall", "f1"])
    
    return score_df

In [15]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
result=train_model(model,X_train_scaled, y_train, X_test_scaled, y_test)
result.index=["Logistic Regression"]


with open("LR_Toddler_model.pkl","wb") as f:
    pickle.dump(model,f)
result

Confusion Matrix is: 
[[ 69   0]
 [  0 142]]


Score Table is: 


Unnamed: 0,accuracy,precision,recall,f1
Logistic Regression,1.0,1.0,1.0,1.0


In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# Assuming X_train_scaled has 29 features
input_shape = X_train_scaled.shape[1]

model = Sequential([
    Dense(64, activation='relu', input_shape=(input_shape,)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # for binary classification
])

model.compile(optimizer="Adam", loss="binary_crossentropy", metrics=["accuracy"])

callback = EarlyStopping(
    monitor="val_loss",
    min_delta=0.001,
    patience=10,
    verbose=1,
    mode="auto",
    restore_best_weights=True
)

train = model.fit(
    X_train_scaled, y_train,
    batch_size=20,
    epochs=100,
    validation_split=0.2,
    callbacks=[callback]
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.5632 - loss: 0.6688 - val_accuracy: 0.8817 - val_loss: 0.4099
Epoch 2/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9291 - loss: 0.3372 - val_accuracy: 0.9172 - val_loss: 0.2258
Epoch 3/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9530 - loss: 0.1878 - val_accuracy: 0.9467 - val_loss: 0.1467
Epoch 4/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9785 - loss: 0.1093 - val_accuracy: 0.9645 - val_loss: 0.1105
Epoch 5/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9938 - loss: 0.0710 - val_accuracy: 0.9822 - val_loss: 0.0858
Epoch 6/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9923 - loss: 0.0542 - val_accuracy: 0.9822 - val_loss: 0.0710
Epoch 7/100
[1m34/34[0m [32m━━

In [17]:
y_prob = model.predict(X_test_scaled)
y_pred = np.where(y_prob > 0.5, 1, 0)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"{accuracy} , {precision} , {recall} , {f1}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
0.981042654028436 , 0.9859154929577465 , 0.9859154929577465 , 0.9859154929577465


In [18]:
dframe = pd.DataFrame([[accuracy, precision, recall, f1]], 
                     columns=['accuracy', 'precision', 'recall', 'f1'])
dframe.index = ["Artificial Neural Network"]
result = pd.concat([result, dframe])


with open("ann_toddler_model.pkl","wb") as f:
    pickle.dump(model,f)
result


Unnamed: 0,accuracy,precision,recall,f1
Logistic Regression,1.0,1.0,1.0,1.0
Artificial Neural Network,0.981043,0.985915,0.985915,0.985915
