<a href="https://colab.research.google.com/github/mathjams/AAAI_2024/blob/main/Region_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Predicting which of $k^2$ regions the hand fixation lies in

#Labeling hand fixation coordinates with its region in the $k\times k$ grid.

Each output has one of $k^2$ labels.

In [None]:
import pandas as pd
import numpy as np
import math as math
from tensorflow.keras.preprocessing.sequence import pad_sequences


def discretesequences2(user_type,k):
  resulteye=[]
  resulthand=[]
  maxlen=0
  maxlenh = 0
  eye_basic_url='/Users/qyuvks/emily/AAAI/data_set/Eye_'
  hand_basic_url='/Users/qyuvks/emily/AAAI/data_set/Hand_'
  if (user_type=='ASD'):
    numOfUser=9
    eye_basic_url+="ASD_"
    hand_basic_url+='ASD_'
  else:
    eye_basic_url+="TD_"
    hand_basic_url+='TD_'
    numOfUser=17
  for i in range(1, numOfUser+1):
    for j in range(0,2):
      c_eye_url=eye_basic_url+'U'+str(i)+"_Active_"+str(j)+".xlsx"
      c_hand_url=hand_basic_url+'U'+str(i)+"_Active_"+str(j)+".xlsx"
      #asd_eye_data=pd.DataFrame()
      try:
        asd_eye_data=pd.read_excel(c_eye_url)
        asd_hand_data=pd.read_excel(c_hand_url)
        starttime= min(np.min(asd_hand_data['start']), np.min(asd_eye_data['start']))
        asd_eye_data['start']+=-starttime
        asd_eye_data['end']+=-starttime
        resulteye.append(asd_eye_data[['x','y','start', 'end']].to_numpy())
        asdhandx=asd_hand_data['x']
        asdhandy=asd_hand_data['y']
        regions = [0]*len(asdhandx)
        for i in range(len(asdhandx)):
          regions[i]=math.floor(k*asdhandx[i])+math.floor(k*asdhandy[i])*k+1
        resulthand.append(regions)
        c_max_length=max(asd_eye_data.shape[0], asd_hand_data.shape[0])
        if c_max_length>maxlen:
          maxlen=c_max_length
        if len(regions)>maxlenh:
          maxlenh = len(regions)
      except IOError:
        print("")
  resulthand2 = pad_sequences(resulthand, maxlen=maxlenh, padding='post', dtype='int32', value=0)
  resulthand3 = np.array(resulthand2)
  return resulteye, resulthand3, maxlen, maxlenh

#LSTM Model Used

In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

def LSTM_model(input_data, output_data, input_max_len, output_max_len, input_features, output_classes, latent_dim, number_of_epochs):
    # Pad the input sequences to their respective maximum lengths
    input_data_padded = pad_sequences(input_data, maxlen=input_max_len, padding='post', dtype='float32', value=0)
    # Convert output data to one-hot encoding if it is not already
    output_data_one_hot = np.eye(output_classes)[output_data]

    if len(output_data_one_hot.shape) == 2:
        output_data_one_hot = np.expand_dims(output_data_one_hot, axis=-1)
        print(len(output_data_one_hot.shape))

    # Pad the output sequences
    output_data_padded = pad_sequences(output_data_one_hot, maxlen=output_max_len, padding='post', dtype='float32', value=0)

    # Shift the output sequences to create the target sequences
    decoder_target_data = np.roll(output_data_padded, shift=-1, axis=1)
    decoder_target_data[:, -1, :] = 0  # Reset last time step to 0 (zero padding)

    # Encoder
    encoder_inputs = Input(shape=(None, input_features), name='encoder_inputs')
    encoder_lstm = LSTM(latent_dim, return_state=True, name='encoder_LSTM')
    encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
    encoder_states = [state_h, state_c]

    # Decoder
    decoder_inputs = Input(shape=(None, output_classes), name='decoder_inputs')
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True, name='decoder_LSTM')
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    decoder_dense = TimeDistributed(Dense(output_classes, activation='softmax'))
    decoder_outputs = decoder_dense(decoder_outputs)

    # Define the model that will turn encoder_inputs and decoder_inputs into decoder_outputs
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

    # Fit the model
#    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
#    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    result = model.fit([input_data_padded, output_data_padded], decoder_target_data,
                      batch_size=50, epochs=number_of_epochs, validation_split=0.2)

    final_loss = result.history['val_loss'][-1]
    print(f'Final validation loss: {final_loss}')

    # Define the encoder model (used for encoding input sequences to their states)
    encoder_model = Model(encoder_inputs, encoder_states)

    # Define the decoder model (used for generating output sequences given the encoded states)
    decoder_state_input_h = Input(shape=(latent_dim,))
    decoder_state_input_c = Input(shape=(latent_dim,))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

    decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
        decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h_dec, state_c_dec]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model(
        [decoder_inputs] + decoder_states_inputs,
        [decoder_outputs] + decoder_states)

    # Extract the number of epochs
    epochs = range(1, number_of_epochs + 1)
    val_loss = result.history['val_loss']
    val_accuracy = result.history['val_accuracy']
    # Save the models
    return model, encoder_model, decoder_model, val_loss, val_accuracy

#Statistical Tests

In [None]:
import statistics
import scipy.stats as stats
Statistics = []
testing = [10,20,50,100]
for j in range(4):
    asdfit=[]
    tdfit=[]
    i=testing[j]
    asdresulteye, asdresulthand, asdmaxlen, asdmaxlenh=discretesequences2('ASD', i)
    tdresulteye, tdresulthand, tdmaxlen, tdmaxlenh=discretesequences2('TD', i)
    for j in range(5):
        modelasd, encoder_modelasd, decoder_modelasd, val_lossasd, val_accuracyasd = LSTM_model(asdresulteye, asdresulthand, asdmaxlen, asdmaxlenh, 4, i**2, 150, 1000)
        modeltd, encoder_modeltd, decoder_modeltd, val_losstd, val_accuracytd = LSTM_model(tdresulteye, tdresulthand, tdmaxlen, tdmaxlenh, 4, i**2, 150, 1000)
        asdfit.append(min(val_lossasd))
        tdfit.append(min(val_losstd))
    t_statistic, p_value = stats.ttest_ind(asdfit, tdfit)
    meanASD = statistics.mean(asdfit)
    meanTD = statistics.mean(tdfit)
    std_asd = statistics.pstdev(asdfit)
    std_td = statistics.pstdev(tdfit)
    print([i, t_statistic, p_value, meanASD, meanTD, std_asd, std_td])
    Statistics.append([i, t_statistic, p_value, meanASD, meanTD, std_asd, std_td])
print(Statistics)