In [2]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn

In [3]:
# Time series dataset
data = pd.read_csv('Caldwell_Manip_Images_10-14_TimeSeries.csv')

# Dataset containing participant vote and image manipulation information
label_vote = pd.read_excel('Caldwell_ImageManipulation-EyeGaze_DataSetCombined.xlsx', sheet_name='data')
label_vote.dtypes

# All features used
FEATURES = ['Fixations_ID','Participant_ID','Image_ID','X Pos','Y Pos','Start Time','Stop Time','Duration','Samples in Fixation']


In [4]:
# Merge two tables
df = pd.merge(data, label_vote, left_on=['Participant_ID', 'Image_ID'], right_on=['participant', 'image'] , how='left')
df = df[['Fixations_ID','Participant_ID','Image_ID','X Pos','Y Pos','Start Time','Stop Time','Duration','Samples in Fixation','vote','image manipulated']]

#Split train and test data
msk = df['Image_ID'] == 14
train_data = df[~msk][:]
test_data = df[msk][:]

# normalise the selected features
for column in ['Fixations_ID','Participant_ID','Image_ID','X Pos','Y Pos','Start Time','Stop Time','Duration','Samples in Fixation']:
    train_data[column] = train_data.loc[:, [column]].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
    train_data[column] = train_data.loc[:, [column]].apply(lambda x: (x - x.mean()) / x.std())
    test_data[column] = test_data.loc[:, [column]].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
    test_data[column] = test_data.loc[:, [column]].apply(lambda x: (x - x.mean()) / x.std())


train_data.shape, test_data.shape

((25686, 11), (5428, 11))

In [5]:
class LSTM(nn.Module):
    
    # LSTM model based on LSTM article posted by Jessica Yung
    # Reference : https://www.jessicayung.com/lstms-for-time-series-in-pytorch/

    def __init__(self, input_dim, hidden_dim, batch_size, output_dim,
                    num_layers, batch_first=True):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers

        # Define the LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers, batch_first=True)

        # Define the output layer
        self.linear = nn.Linear(self.hidden_dim, output_dim)

    def init_hidden(self):
        # Initialise our hidden state as
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))

    def forward(self, input):
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
        lstm_out, self.hidden = self.lstm(input.view(len(input), self.batch_size, -1))
        
        # Only take the output from the final timetep
        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        lstm_out = self.linear(lstm_out[:,-1,:])
        return lstm_out

    

In [6]:

hidden_dim= 5
batch= 1
output_dim= 3
num_layers= 2
learning_rate= .1
num_epochs = 200



loss_fn = torch.nn.CrossEntropyLoss()
# optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
#####################
# Train model
#####################

def modeling(num_features, x, y):
    model = LSTM(num_features, hidden_dim, batch, output_dim, num_layers)
    optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)


    losses=[]
    accuracies = []

    for t in range(num_epochs):
        # Clear stored gradient
        model.zero_grad()

        # Initialise hidden state
        model.hidden = model.init_hidden()

        # Forward pass
        y_pred = model(x)
        
        # Calculate the loss
        loss = loss_fn(y_pred, y)
        losses.append(loss.item())
        
        if t % 50 == 0 or t==num_epochs-1:
            # convert predicted Y values to one column for comparison
            _, predicted = torch.max(y_pred, 1)
            # calculate and print accuracy
            total = predicted.size(0)
            correct = predicted.data.numpy() == y.data.numpy()
            accuracies.append(100 * sum(correct) / total)
            # Print loss and accuracy
            print('Epoch [%d/%d] Loss: %.4f  Accuracy: %.2f %%' % (t + 1, num_epochs, loss.item(), 100 * sum(correct) / total))

        # Zero out gradient, else they will accumulate between epochs
        optimiser.zero_grad()

        # Backward pass
        loss.backward()

        # Update parameters
        optimiser.step()
     
    return losses[-1], accuracies[-1]


In [8]:
# define GA settings
DNA_SIZE = 9             # number of bits in DNA
POP_SIZE = 4        # population size
CROSS_RATE = 0.8          # DNA crossover probability
MUTATION_RATE = 0.02     # mutation probability
N_GENERATIONS = 50       # generation size


# define population select function based on fitness value
# population with higher fitness value has higher chance to be selected
def select(pop, fitness):
    idx = np.random.choice(np.arange(POP_SIZE+1), size=POP_SIZE + 1, replace=True,
                           p=fitness/fitness.sum())
    return pop[idx]

# define mutation function
def mutate(child):
    for point in range(DNA_SIZE):
        if np.random.rand() < MUTATION_RATE:
            child[point] = 1 if child[point] == 0 else 0
    return child

# define gene crossover function
def crossover(parent, pop):
    if np.random.rand() < CROSS_RATE:
        # randomly select another individual from population
        i = np.random.randint(0, POP_SIZE, size=1)    
        # choose crossover points(bits)
        cross_points = np.random.randint(0, 2, size=DNA_SIZE).astype(np.bool)
        # produce one child
        parent[cross_points] = pop[i, cross_points]  
    return parent

# Get selected features by DNA
def selected_features(features, dna):
    subset = []
    if len(features) == len(dna):
        for i in range(len(dna)):
            if dna[i] == 1:
                subset.append(features[i])
    return subset

# Get X and Y
def input_target(df,subset, target):
    
    X = torch.Tensor(df[subset].values).float()
    Y = torch.Tensor(df[target].values).long()
    return X, Y
    

In [9]:
# Initialize the population DNA
pop = np.random.randint(2, size=(POP_SIZE, DNA_SIZE))
pop = np.append(pop, [[1]*9],axis = 0)
print(pop)


for t in range(N_GENERATIONS):
    print("------------- Generation ', t,'-------------")
    # fitness values for all populations
    fitness = []
    fit=np.array([])
    for p in pop:
        print("The selected parent: ", p)
        features = selected_features(FEATURES, p)
        
        x, y = input_target(train_data.iloc[[i for i in range(len(train_data)) if i % 10 == 0]], features, "vote")
              
        # use selected features to train the model
        loss, accuracy = modeling(len(features), x, y)
        
        # add the loss related value to the fitness
        fitness.append((loss,accuracy))
        
        
        fit = np.append(fit,loss)
    
    # select parent 1 index
    p1 = fitness.index(min(fitness))
    if min(fitness)<(0.3, 1):
        print('End-----------', pop[p1], "fit: ", fitness[p1])
        break
              
    selected_pop = select(pop, fit)
    print("number selected:  ",len(selected_pop))
    selected_pop_copy = selected_pop.copy()
    print("selected pop", selected_pop)
    for parent in selected_pop:
        child = crossover(parent, selected_pop_copy)
        
        print("child",child)
        child = mutate(child)
        parent[:] = child
    if t == N_GENERATIONS -1:
        print("End-----------", pop[p1], "fit: ", fitness[p1])
        
print(pop)
    

[[1 1 1 1 1 1 1 0 1]
 [0 1 0 1 0 1 1 1 1]
 [0 0 1 0 0 1 0 1 0]
 [1 0 1 0 1 1 0 1 0]
 [1 1 1 1 1 1 1 1 1]]
------------- Generation  0 -------------
The selected parent:  [1 1 1 1 1 1 1 0 1]
Epoch [1/200] Loss: 1.0654  Accuracy: 51.75 %
Epoch [51/200] Loss: 0.8916  Accuracy: 51.75 %
Epoch [101/200] Loss: 0.8683  Accuracy: 51.75 %
Epoch [151/200] Loss: 0.8622  Accuracy: 51.75 %
Epoch [200/200] Loss: 0.8602  Accuracy: 51.75 %
The selected parent:  [0 1 0 1 0 1 1 1 1]
Epoch [1/200] Loss: 1.1975  Accuracy: 5.27 %
Epoch [51/200] Loss: 0.9015  Accuracy: 51.75 %
Epoch [101/200] Loss: 0.8696  Accuracy: 51.75 %
Epoch [151/200] Loss: 0.8620  Accuracy: 51.75 %


KeyboardInterrupt: 

In [None]:
X, Y = input_target(df.iloc[[i for i in range(len(df)) if i % 10 == 0]], FEATURES, 'vote')
x = modeling(9,X,Y)
x