In [4]:
import data_preprocess as dp
from dearpygui import dearpygui as dpg
import pandas as pd
import os

if __name__ == '__main__':
    path_arff = open('data/Autism-Adult-Data.arff')
    df = dp.load_data(path_arff)
    df, df_additional = dp.drop_columns(df, 'used_app_before', 'country_of_res', 'age_desc', 'result', 'jundice')
    df = dp.binary_encoding(df, 'autism_relation', 'ASD', 'gender')
    df, df_additional = dp.remove_categorical(df, df_additional, 'ethnicity', 'relation')
    df = dp.locate_na_index(df)
    df = dp.correct_types(df)
    X, x, y = dp.create_training_variables(df)
    x_train, x_test, y_train, y_test = dp.split_train_test(x, y)
    # x_train, x_test, scaler = dp.create_scaler(x_train, x_test)
    regressor = dp.create_model(x_train, y_train)

    y_pred, score = dp.test_model(regressor, x_test, y_test)



## UI

dpg.create_context() # Creates context


with dpg.value_registry():
    tags = ['q' + f'{i}' for i in range(1, 11)]
    for tag in tags:
        dpg.add_string_value(tag=tag)
    dpg.add_int_value(tag='score', default_value=0)
    dpg.add_int_value(tag='age', default_value=0)
    dpg.add_string_value(tag='relation')
    dpg.add_string_value(tag='gender')
    dpg.add_int_value(tag='result')


#TODO: Work out how to check answers (some questions are inverted, maybe if [inverted question] else: score normal ?
def store_results():
    # with dpg.value_registry():
    tags = ['q' + f'{i}' for i in range(1, 11)]
    score = 0
    reverse_marked = ['q1', 'q7', 'q8', 'q10']
    for tag in tags:
        if tag in reverse_marked:
            value = dpg.get_value(tag)
            if value == 'Definitely Agree' or value == 'Slightly Agree':
                score += 1
            else:
                pass
        else:
            value = dpg.get_value(tag)
            if value == 'Definitely Disagree' or value == 'Slightly Disagree':
                score += 1
            else:
                pass
    dpg.set_value(value=score, item='score')
    dpg.set_value(value=f'AQ-10 score: {dpg.get_value("score")}', item='aq_score')


def AQ10():
    with dpg.window(label='AQ-10 Test'):
        options = ['Definitely Agree', 'Slightly Agree', 'Slightly Disagree', 'Definitely Disagree']
        dpg.add_text('Question 1: I often notice small sounds when others do not')
        dpg.add_radio_button(items=options, label='1', source='q1')
        dpg.add_text('Question 2: I usually concentrate more on the whole picture, rather than the small details')
        dpg.add_radio_button(items=options, label='2', source='q2')
        dpg.add_text('Question 3: I find it easy to do more than one thing at once')
        dpg.add_radio_button(items=options, label='3', source='q3')
        dpg.add_text('Question 4: If there is an interruption, I can switch back to what I was doing very quickly')
        dpg.add_radio_button(items=options, label='4', source='q4')
        dpg.add_text('Question 5: I find it easy to "read between the lines" when someone is talking to me')
        dpg.add_radio_button(items=options, label='5', source='q5')
        dpg.add_text('Question 6: I know how to tell if someone listening to me is getting bored')
        dpg.add_radio_button(items=options, label='6', source='q6')
        dpg.add_text('Question 7: When I am reading a story I find it difficult to work out the characters intentions')
        dpg.add_radio_button(items=options, label='7', source='q7')
        dpg.add_text('Question 8: I like to collect information about categories of things (e.g. types of car, types of bird, types of train, types of plant etc)')
        dpg.add_radio_button(items=options, label='8', source='q8')
        dpg.add_text('Question 9: I find it easy to work out what someone is thinking or feeling just by looking at their face')
        dpg.add_radio_button(items=options, label='9', source='q9')
        dpg.add_text('Question 10: I find it difficult to work out people’s intentions')
        dpg.add_radio_button(items=options, label='10', source='q10')
        dpg.add_button(label='Submit', callback=store_results)


def save_csv(value_df):
    value_df.to_csv('saved_results.csv')


def create_result(model, transformer):
    with dpg.window(label='Results Page'):
        items = ['q' + f'{i}' for i in range(1, 11)]
        items.extend(['age', 'relation', 'gender']) #score removed as high correlation not adding much to model
        values = dpg.get_values(items)
        values_df = pd.DataFrame(columns=items)
        values_df.loc[0] = values

        questions = ['q' + f'{i}' for i in range(1, 11)]
        for question in questions:
            reverse_marked = ['q1', 'q7', 'q8', 'q10']
            if question in reverse_marked:
                value = values_df[question][0]
                if value == 'Definitely Agree' or value == 'Slightly Agree':
                    print(f'{value} changed to 1 in {question}')
                    values_df[question] = 1
                else:
                    values_df[question] = 0
                    print(f'{value} changed to 0 in {question}')
            else:
                value = values_df[question][0]
                if value == 'Definitely Disagree' or value == 'Slightly Disagree':
                    values_df[question] = 1
                    print(f'{value} changed to 1 in {question}')
                else:
                    values_df[question] = 0
                    print(f'{value} changed to 0 in {question}')
        if values_df['relation'][0] == 'Yes':
            values_df['relation'] = 1
        else:
            values_df['relation'] = 0
        if values_df['gender'][0] == 'Male':
            values_df['gender'] = 1
        else:
            values_df['gender'] = 0
        value_array = values_df.values
        # value_array = scaler.transform(value_array)
        result = regressor.predict(value_array)
        dpg.set_value(value=int(result), item='result')
        values_df['result'] = result
        if dpg.get_value("result") == 0:
            dpg.add_text(f'Likely do not have ASD')
            dpg.add_text(f'{values_df}')
            if dpg.get_value("score") >= 6:
                dpg.add_text(f'As the AQ-10 score is 6 or greater, it is still recommended to test further.')

        else:
            dpg.add_text(f'Likely have ASD - further testing is recommended.')
            dpg.add_text(f'{values_df}')
        file_name = 'saved_results.csv'
        # To avoid writing headers repeatedly
        if os.path.isfile(file_name):
            mode='a'
            headers=False
        else:
            mode = 'w'
            headers=True
        dpg.add_text(f'Results saved to {file_name}')
        values_df.to_csv(file_name, mode=mode, header=headers, index=False)



def classify():
    with dpg.window(label='Classifier'):
        dpg.add_button(label='AQ-10 Test', callback=AQ10)
        dpg.add_text(tag='aq_score', default_value=f'AQ-10 not yet completed')
        dpg.add_input_int(label='Age', source='age')
        dpg.add_text('Relation with autism?')
        dpg.add_radio_button(items=['Yes', 'No'], label='Relation', source='relation')
        dpg.add_text('Gender')
        dpg.add_radio_button(items=['Male', 'Female'], label='Gender', source='gender')
        dpg.add_button(label='Get Results', callback=create_result)




def instructions():
    with dpg.window(label='Instructions'):
        dpg.add_text('1. Click Classify Button.')
        dpg.add_text('2.Complete AQ-10 Test and click submit at the bottom')
        dpg.add_text('3. Enter age, relation and gender')
        dpg.add_text('4. Click Get Results!')


with dpg.window(label='Main Menu', tag='main_menu', width=1280, height=720, no_close=True):
    dpg.add_text('Please select an option!')
    dpg.add_button(label='Instructions', callback=instructions)
    dpg.add_button(label='Classify', callback=classify)


dpg.create_viewport(title='Healthcare Diagnosis Support', width=1280, height=720) # Creates overall window
dpg.setup_dearpygui() # Assign Viewport
dpg.show_viewport() # Shows overall window
dpg.start_dearpygui() # Starts window
dpg.destroy_context() # Destroys context





Loading
Data loaded of length 704!
Index(['A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score',
       'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', 'age', 'gender',
       'ethnicity', 'jundice', 'autism_relation', 'country_of_res',
       'used_app_before', 'result', 'age_desc', 'relation', 'ASD'],
      dtype='object')

Created additional dataframe from dropped

Preprocessing:
Dropped ('used_app_before', 'country_of_res', 'age_desc', 'result', 'jundice') columns

Encoding:
Running encoding on ('autism_relation', 'ASD', 'gender')
Encoded autism_relation
Dropped autism_relation
Encoded ASD
Dropped ASD
Encoded gender
Dropped gender
ethnicity added to additional
ethnicity dropped from dataframe
relation added to additional
relation dropped from dataframe
index 10 has 2 null values
max slice:
   A1_Score A2_Score A3_Score A4_Score A5_Score A6_Score A7_Score A8_Score   
52        1        0        0        0        0        0        0        0  \

   A9_Score A10_Sco

####

### Testing Techniques and Model Analysis

In [2]:
import data_preprocess as dp
path_arff = open('data/Autism-Adult-Data.arff')
data = dp.load_data(path_arff)
data, df_additional = dp.drop_columns(data, 'used_app_before', 'country_of_res', 'age_desc', 'result', 'jundice')
data = dp.binary_encoding(data, 'autism_relation', 'ASD', 'gender')
data, df_additional = dp.remove_categorical(data, df_additional, 'ethnicity', 'relation')
data = dp.locate_na_index(data)
data = dp.correct_types(data)

NameError: name 'dpg' is not defined

In [None]:
dframe = pd.DataFrame(columns=X.columns)
dframe.loc[0] = regressor.coef_[0]
dframe = dframe.transpose()

dframe.plot.bar(legend=False)

In [7]:
import numpy as np


unique, counts = np.unique(y_test, return_counts=True)
print(f'{unique=}, {counts=}')

unique=array([0, 1]), counts=array([101,  40], dtype=int64)


## SMOTE Testing

In [41]:
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score

sm = SMOTE(random_state=42)

x_train_res, y_train_res = sm.fit_resample(x_train, y_train)
lr2 = LogisticRegression(max_iter=1000)
lr2.fit(x_train_res, y_train_res)
y_pred_res = lr2.predict(x_test)
score2 = accuracy_score(y_test, y_pred_res)
print(score2)
rscore2 = recall_score(y_test, y_pred_res)
for i, val in enumerate(y_test):
    if val == y_pred_res[i]:
        pass
    else:
        print(f'actual={val}, {y_pred_res[i]=}')

print(rscore2)

0.9574468085106383
actual=0, y_pred_res[i]=1
actual=0, y_pred_res[i]=1
actual=0, y_pred_res[i]=1
actual=0, y_pred_res[i]=1
actual=0, y_pred_res[i]=1
actual=0, y_pred_res[i]=1
1.0


In [42]:
count =0
for val in y_train:
    if val == 1:
        count +=1
    else:
        pass
print(count)

160


In [43]:
count =0
for val in y_train_res:
    if val == 1:
        count +=1
    else:
        pass
print(count)

400


In [21]:
len(y_train)

562