In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

# Load the data
data = pd.read_excel('Autism_Child_Data1.xlsx')

# Preprocess the data
data.dropna(inplace=True)

# Convert the Pandas dataframes into numpy arrays that can be used by scikit_learn
data_raw = data['Class/ASD']
features_raw = data[['age', 'gender', 'ethnicity', 'jundice', 'austim', 'contry_of_res', 'result',
                     'relation', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score', 'A7_Score', 'A8_Score',
                     'A9_Score', 'A10_Score']]

# Data Preprocessing: using MinMaxScaler
scaler = MinMaxScaler()
num = ['age', 'result']
features_minmax_transform = pd.DataFrame(data=features_raw)
features_minmax_transform[num] = scaler.fit_transform(features_raw[num])

# One-Hot Encoding on features_minmax_transform
features_final = pd.get_dummies(features_minmax_transform)

# Encode all classes data to numerical values
data_classes = data_raw.apply(lambda x: 1 if x == 'YES' else 0)

# Shuffle and Split the data
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(features_final, data_classes, test_size=0.2, random_state=1)

# Train Logistic Regression model
lr_model = LogisticRegression(max_iter=200, random_state=1)
lr_model.fit(X_train, y_train)

# Make predictions
y_pred_lr = lr_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_lr)
print(f"Logistic Regression Accuracy: {accuracy:.4f}")

# Save the trained model to a pickle file
with open('Child_model.pkl', 'wb') as f:
    pickle.dump(lr_model, f)

# Save the scaler to a pickle file
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

print("Model and scaler have been saved to pickle files.")

Logistic Regression Accuracy: 0.9828
Model and scaler have been saved to pickle files.


In [23]:
data = pd.read_csv('Preprocessed_Autism_Data_child.csv')
print(data.head)
# Get unique values for categorical features
unique_genders = data['gender'].unique()
unique_ethnicities = data['ethnicity'].unique()
unique_jundice = data['jundice'].unique()
unique_austim = data['austim'].unique()
unique_countries = data['contry_of_res'].unique()
unique_relations = data['relation'].unique()

<bound method NDFrame.head of      A1_Score  A2_Score  A3_Score  A4_Score  A5_Score  A6_Score  A7_Score  \
0           1         1         0         0         1         1         0   
1           1         1         0         0         1         1         0   
2           1         1         0         0         0         1         1   
3           0         1         0         0         1         1         0   
4           1         1         1         1         1         1         1   
..        ...       ...       ...       ...       ...       ...       ...   
287         1         1         1         1         1         1         1   
288         1         0         0         0         1         0         1   
289         1         0         1         1         1         1         1   
290         1         1         1         0         1         1         1   
291         0         0         1         0         1         0         1   

     A8_Score  A9_Score  A10_Score  ...  gend

In [24]:
print(unique_ethnicities,unique_jundice,unique_countries,unique_relations,unique_austim,sep="\n\n")

['Others' 'Middle Eastern' 'White-European' 'Black' 'South Asian' 'Asian'
 'Pasifika' 'Hispanic' 'Turkish' 'Latino']

['no' 'yes']

['Jordan' 'USA' 'Egypt' 'UK' 'Bahrain' 'Austria' 'Kuwait' 'UAE' 'Europe'
 'Malta' 'Bulgaria' 'South Africa' 'India' 'Afghanistan' 'Georgia'
 'New Zealand' 'Syria' 'Iraq' 'Australia' 'Saudi Arabia' 'Armenia'
 'Turkey' 'Pakistan' 'Canada' 'Oman' 'Brazil' 'South Korea' 'Costa Rica'
 'Sweden' 'Philippines' 'Malaysia' 'Argentina' 'Japan' 'Bangladesh'
 'Qatar' 'Ireland' 'Romania' 'Netherlands' 'Lebanon' 'Germany' 'Latvia'
 'Russia' 'Italy' 'China' 'Nigeria' 'US Outlying Islands' 'Nepal' 'Mexico'
 'Isle of Man' 'Libya' 'Ghana' 'Bhutan']

['Parent' 'Others' 'Self' 'Relative' 'Healthcare Professional']

['no' 'yes']


In [22]:
import pandas as pd

# Load the dataset
df = pd.read_excel(r"Autism_Child_Data1.xlsx")

# Handle missing or unknown values
df.replace("?", "Others", inplace=True)

# Standardize text format: Trim spaces, remove extra quotes, and use title case
df['ethnicity'] = df['ethnicity'].astype(str).str.strip().str.title()
df['contry_of_res'] = df['contry_of_res'].astype(str).str.strip().str.title()
df['relation'] = df['relation'].astype(str).str.strip().str.title()

# Dictionary for country name corrections
country_corrections = {
    'United States': 'USA',
    'United Kingdom': 'UK',
    'United Arab Emirates': 'UAE',
    'Viet Nam': 'Vietnam',
    'U.S. Outlying Islands': 'US Outlying Islands',
    'Isle Of Man': 'Isle of Man'
}

# Dictionary for ethnicity corrections
ethnicity_corrections = {
    'Middle Eastern ': 'Middle Eastern',
    'South Asian': 'South Asian'
}

# Dictionary for relation corrections
relation_corrections = {
    'Self': 'Self',
    'self': 'Self',
    'Health Care Professional': 'Healthcare Professional'
}

# Apply spelling corrections
df['contry_of_res'] = df['contry_of_res'].replace(country_corrections)
df['ethnicity'] = df['ethnicity'].replace(ethnicity_corrections)
df['relation'] = df['relation'].replace(relation_corrections)

# Save the cleaned data to a new CSV file
df.to_csv(r"Preprocessed_Autism_Data_child.csv", index=False)

print("Preprocessing complete. File saved as 'Preprocessed_Autism_Data_child.csv'.")


Preprocessing complete. File saved as 'Preprocessed_Autism_Data_child.csv'.


In [25]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

# Load the data
data = pd.read_csv('Preprocessed_Autism_Data_child.csv')

# Preprocess the data
data.dropna(inplace=True)

# Convert the target column to binary (1 for 'YES', 0 otherwise)
data_classes = data['Class/ASD'].apply(lambda x: 1 if x == 'YES' else 0)

# Define features
features_raw = data[['age', 'gender', 'ethnicity', 'jundice', 'austim', 
                     'contry_of_res', 'result', 'relation', 'A1_Score', 
                     'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 
                     'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score']].copy()  # ✅ Copy added

# Define numerical columns
num_columns = ['age', 'result']

# Initialize scalers
minmax_scaler = MinMaxScaler()
standard_scaler = StandardScaler()

# Apply MinMaxScaler
features_minmax = features_raw.copy()  # ✅ Copy added
features_minmax[num_columns] = minmax_scaler.fit_transform(features_minmax[num_columns])

# Apply StandardScaler
features_standard = features_raw.copy()  # ✅ Copy added
features_standard[num_columns] = standard_scaler.fit_transform(features_standard[num_columns])

# One-Hot Encoding
features_final = pd.get_dummies(features_standard)

# Shuffle and split the data
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(features_final, data_classes, test_size=0.2, random_state=1)

# Train Logistic Regression model
lr_model = LogisticRegression(max_iter=200, random_state=1)
lr_model.fit(X_train, y_train)

# Make predictions
y_pred_lr = lr_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_lr)
print(f"Logistic Regression Accuracy: {accuracy:.4f}")

# Save the trained model
with open('Child_model.pkl', 'wb') as f:
    pickle.dump(lr_model, f)

# Save the MinMax scaler
with open('minmax_scaler(Childs).pkl', 'wb') as f:
    pickle.dump(minmax_scaler, f)

# Save the Standard scaler
with open('standard_scaler(Childs).pkl', 'wb') as f:
    pickle.dump(standard_scaler, f)

print("Model and scalers have been saved to pickle files.")


Logistic Regression Accuracy: 1.0000
Model and scalers have been saved to pickle files.


In [None]:
import pickle
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Load the trained Logistic Regression model from the pickle file
with open('Child_model.pkl', 'rb') as f:
    lr_model = pickle.load(f)

# Load the MinMaxScaler
with open('minmax_scaler(Childs).pkl', 'rb') as f:
    minmax_scaler = pickle.load(f)

# Load the StandardScaler
with open('standard_scaler(Childs).pkl', 'rb') as f:
    standard_scaler = pickle.load(f)

# Load the dataset to get unique values for categorical features
data = pd.read_csv('Preprocessed_Autism_Data_child.csv')

# Get unique values for categorical features
unique_genders = data['gender'].unique()
unique_ethnicities = data['ethnicity'].unique()
unique_jundice = data['jundice'].unique()
unique_austim = data['austim'].unique()
unique_countries = data['contry_of_res'].unique()
unique_relations = data['relation'].unique()

# Define feature columns
feature_columns = ['age', 'gender', 'ethnicity', 'jundice', 'austim', 'contry_of_res', 'result',
                   'relation', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score',
                   'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score']

# Preprocess the training data to get reference columns
data.dropna(inplace=True)
features_raw = data[feature_columns].copy()
features_transformed = features_raw.copy()

# Apply both scalers
features_transformed[['age', 'result']] = minmax_scaler.transform(features_raw[['age', 'result']])
features_transformed[['age', 'result']] = standard_scaler.transform(features_transformed[['age', 'result']])

# One-hot encode categorical variables
features_final = pd.get_dummies(features_transformed)

# Store reference columns
reference_columns = features_final.columns

# Function to preprocess user input
def preprocess_input(user_input):
    # Convert user input to DataFrame
    input_df = pd.DataFrame([user_input], columns=feature_columns)

    # Apply both scalers
    input_df[['age', 'result']] = minmax_scaler.transform(input_df[['age', 'result']])
    input_df[['age', 'result']] = standard_scaler.transform(input_df[['age', 'result']])

    # One-hot encode categorical variables
    input_df = pd.get_dummies(input_df)

    # Ensure all columns are present
    for col in reference_columns:
        if col not in input_df.columns:
            input_df[col] = 0

    # Reorder columns to match training data
    input_df = input_df[reference_columns]

    return input_df

# Function to get user input and make prediction
def get_user_input_and_predict():
    # Get user input
    user_input = {
        'age': float(input("Enter age (e.g., 4-11): ")),
        'gender': input(f"Enter gender ({'/'.join(unique_genders)}): ").strip().upper(),
        'ethnicity': input(f"Enter ethnicity ({'/'.join(unique_ethnicities)}): ").strip(),
        'jundice': input(f"Had jaundice ({'/'.join(unique_jundice)}): ").strip().lower(),
        'austim': input(f"Family member with autism ({'/'.join(unique_austim)}): ").strip().lower(),
        'contry_of_res': input(f"Enter country of residence ({'/'.join(unique_countries)}): ").strip(),
        'result': float(input("Enter result (e.g., 10.0): ")),
        'relation': input(f"Enter relation ({'/'.join(unique_relations)}): ").strip(),
        'A1_Score': int(input("Enter A1_Score (0 or 1): ")),
        'A2_Score': int(input("Enter A2_Score (0 or 1): ")),
        'A3_Score': int(input("Enter A3_Score (0 or 1): ")),
        'A4_Score': int(input("Enter A4_Score (0 or 1): ")),
        'A5_Score': int(input("Enter A5_Score (0 or 1): ")),
        'A6_Score': int(input("Enter A6_Score (0 or 1): ")),
        'A7_Score': int(input("Enter A7_Score (0 or 1): ")),
        'A8_Score': int(input("Enter A8_Score (0 or 1): ")),
        'A9_Score': int(input("Enter A9_Score (0 or 1): ")),
        'A10_Score': int(input("Enter A10_Score (0 or 1): "))
    }

    # Preprocess the input
    input_df = preprocess_input(user_input)

    # Make prediction
    prediction = lr_model.predict(input_df)

    # Print the prediction
    if prediction[0] == 1:
        print("The model predicts that the individual has ASD.")
    else:
        print("The model predicts that the individual does not have ASD.")

# Call the function to get user input and make prediction
get_user_input_and_predict()


The model predicts that the individual does not have ASD.


In [None]:
import pickle
import pandas as pd
import streamlit as st
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Load the trained Logistic Regression model
with open('Child_model.pkl', 'rb') as f:
    lr_model = pickle.load(f)

# Load the MinMaxScaler
with open('minmax_scaler(Childs).pkl', 'rb') as f:
    minmax_scaler = pickle.load(f)

# Load the StandardScaler
with open('standard_scaler(Childs).pkl', 'rb') as f:
    standard_scaler = pickle.load(f)

# Load dataset to get unique values for categorical features
data = pd.read_excel("Autism_Child_Data1.xlsx")
data.dropna(inplace=True)

# Extract unique values for categorical inputs
unique_genders = data['gender'].unique()
unique_ethnicities = data['ethnicity'].unique()
unique_jundice = data['jundice'].unique()
unique_austim = data['austim'].unique()
unique_countries = data['contry_of_res'].unique()
unique_relations = data['relation'].unique()

# Define feature columns
feature_columns = ['age', 'gender', 'ethnicity', 'jundice', 'austim', 'contry_of_res', 'result',
                   'relation', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score',
                   'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score']

# Process training data for reference columns
features_raw = data[feature_columns].copy()
features_transformed = features_raw.copy()
features_transformed[['age', 'result']] = minmax_scaler.transform(features_raw[['age', 'result']])
features_transformed[['age', 'result']] = standard_scaler.transform(features_transformed[['age', 'result']])
features_final = pd.get_dummies(features_transformed)
reference_columns = features_final.columns

def preprocess_input(user_input):
    input_df = pd.DataFrame([user_input], columns=feature_columns)
    input_df[['age', 'result']] = minmax_scaler.transform(input_df[['age', 'result']])
    input_df[['age', 'result']] = standard_scaler.transform(input_df[['age', 'result']])
    input_df = pd.get_dummies(input_df)
    
    for col in reference_columns:
        if col not in input_df.columns:
            input_df[col] = 0
    
    return input_df[reference_columns]

# Streamlit UI
st.title("Autism Spectrum Disorder (ASD) Prediction")
st.write("Fill in the details below to get a prediction.")

age = st.number_input("Age", min_value=1, max_value=100, value=5)
gender = st.selectbox("Gender", unique_genders)
ethnicity = st.selectbox("Ethnicity", unique_ethnicities)
jundice = st.selectbox("Had jaundice?", unique_jundice)
austim = st.selectbox("Family member with autism?", unique_austim)
country = st.selectbox("Country of residence", unique_countries)
result = st.number_input("Test Result (e.g., 10.0)", min_value=0.0, value=10.0)
relation = st.selectbox("Relation", unique_relations)

# Autism test scores (0 or 1)
a1 = st.radio("A1 Score", [0, 1])
a2 = st.radio("A2 Score", [0, 1])
a3 = st.radio("A3 Score", [0, 1])
a4 = st.radio("A4 Score", [0, 1])
a5 = st.radio("A5 Score", [0, 1])
a6 = st.radio("A6 Score", [0, 1])
a7 = st.radio("A7 Score", [0, 1])
a8 = st.radio("A8 Score", [0, 1])
a9 = st.radio("A9 Score", [0, 1])
a10 = st.radio("A10 Score", [0, 1])

if st.button("Predict ASD Status"):
    user_input = {
        'age': age,
        'gender': gender,
        'ethnicity': ethnicity,
        'jundice': jundice,
        'austim': austim,
        'contry_of_res': country,
        'result': result,
        'relation': relation,
        'A1_Score': a1,
        'A2_Score': a2,
        'A3_Score': a3,
        'A4_Score': a4,
        'A5_Score': a5,
        'A6_Score': a6,
        'A7_Score': a7,
        'A8_Score': a8,
        'A9_Score': a9,
        'A10_Score': a10
    }
    
    input_df = preprocess_input(user_input)
    prediction = lr_model.predict(input_df)
    result_text = "The model predicts that the individual has ASD." if prediction[0] == 1 else "The model predicts that the individual does not have ASD."
    
    st.subheader("Prediction Result")
    st.write(result_text)


2025-02-27 00:11:12.123 
  command:

    streamlit run c:\Users\saiha\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-02-27 00:11:12.127 Session state does not function when running a script without `streamlit run`
