In [None]:
### Necessary Packages and Functions

import pandas as pd
import numpy as np
from keras import models
from keras import layers
from keras import utils
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
import random
from keras.models import model_from_json


# function used to create unique IDs
def concatenate_row(row):
        strs = []
        for item in row:
            strs.append(str(item))
        return ''.join(strs)
    
# split train/test data intelligently
def split_train_test(joint_data):
    return joint_data[joint_data['Test'] == False], joint_data[joint_data['Test'] == True]

# split input_output for neural network (X, y)
def split_input_output(df):
    y = df.pop('Left_Saved')
    X = df
    return X, y


In [None]:
### Importing and Cleaning Full Moral Machine Dataset Responses

pd.set_option('display.max_columns', None)
df_path = './SharedResponses.csv.tar.gz'
df_columns = [
    'Man', 'Woman', 'Pregnant', 'Stroller', 'OldMan', 'OldWoman', 'Boy', 'Girl', 'Homeless','LargeWoman', 'LargeMan', 'Criminal', 'MaleExecutive', 'FemaleExecutive', 'FemaleAthlete', 'MaleAthlete',
    'FemaleDoctor', 'MaleDoctor', 'Dog', 'Cat', 'LeftHand', 'Saved', 'CrossingSignal', 'Unnamed: 0', 'PedPed', 'ExtendedSessionID'
]

left_data = None
right_data = None
df_reader = pd.read_csv(df_path, skiprows=3, usecols=df_columns, skipinitialspace=True, chunksize=10**6)
counter = 0
for chunk in df_reader:
    counter += 1
    chunk = chunk.dropna()

# only considering pedestrian vs. pedestrian accident scenarios
    chunk = chunk[chunk['PedPed'] != 0]
    
# cast as smaller data type to save memory
    chunk = chunk.astype({'Man': 'int8',
    'Woman': 'int8',
    'Pregnant': 'int8',
    'Stroller': 'int8',
    'OldMan': 'int8',
    'OldWoman': 'int8',
    'Boy': 'int8',
    'Girl': 'int8',
    'Homeless': 'int8',
    'LargeWoman': 'int8',
    'LargeMan': 'int8',
    'Criminal': 'int8',
    'MaleExecutive': 'int8',
    'FemaleExecutive': 'int8',
    'FemaleAthlete': 'int8',
    'MaleAthlete': 'int8',
    'FemaleDoctor': 'int8',
    'MaleDoctor': 'int8',
    'Dog': 'int8',
    'Cat': 'int8',
    'Saved': np.bool,
    'LeftHand': np.bool,
    'CrossingSignal': 'int8',
    'Unnamed: 0': 'string',
    })

    left_chunk = chunk[chunk['LeftHand'] == 1]
    if left_data is None:
        left_data = left_chunk.add_prefix('Left_')
    else:
        left_data = pd.concat([left_data, left_chunk.add_prefix('Left_')], ignore_index=True)

    right_chunk = chunk[chunk['LeftHand'] == 0]
    if right_data is None:
        right_data = right_chunk.add_prefix('Right_')
    else:
        right_data = pd.concat([right_data, right_chunk.add_prefix('Right_')], ignore_index=True)
    print(counter)

joint_data = left_data.merge(right_data, left_on=['Left_Unnamed: 0'], right_on=['Right_Unnamed: 0'], how='inner').drop(columns=['Left_Unnamed: 0', 'Right_Unnamed: 0', 'Right_ExtendedSessionID','Left_LeftHand', 'Right_LeftHand', 'Left_PedPed', 'Right_PedPed', 'Right_Saved', 'Right_CrossingSignal'])

del left_data
del right_data
del left_chunk
del right_chunk
joint_cols = [
'Left_ExtendedSessionID',
'Left_Man',
'Left_Woman',
'Left_CrossingSignal',
'Left_Pregnant',
'Left_Stroller',
'Left_OldMan',
'Left_OldWoman',
'Left_Boy',
'Left_Girl',
'Left_Homeless',
'Left_LargeWoman',
'Left_LargeMan',
'Left_Criminal',
'Left_MaleExecutive',
'Left_FemaleExecutive',
'Left_FemaleAthlete',
'Left_MaleAthlete',
'Left_FemaleDoctor',
'Left_MaleDoctor',
'Left_Dog',
'Left_Cat',
'Right_Man',
'Right_Woman',
'Right_Pregnant',
'Right_Stroller',
'Right_OldMan',
'Right_OldWoman',
'Right_Boy',
'Right_Girl',
'Right_Homeless',
'Right_LargeWoman',
'Right_LargeMan',
'Right_Criminal',
'Right_MaleExecutive',
'Right_FemaleExecutive',
'Right_FemaleAthlete',
'Right_MaleAthlete',
'Right_FemaleDoctor',
'Right_MaleDoctor',
'Right_Dog',
'Right_Cat',
'Left_Saved'
]
joint_data = joint_data[joint_cols]
print(joint_data.info(memory_usage='deep'))

# Insert Bias Column
all_ones = [1 for i in range(len(joint_data))]
joint_data.insert(2, "Bias", all_ones)
joint_data = joint_data.astype({'Bias': 'int8'})

# Version 1 dataframe (raw saved)
joint_data.to_hdf("hdfMoralMachineDataNN1", key='dataframe')
print('version 1 data saved')
del joint_data

In [None]:
### Build and Save Version 2 of Dataset

# Create IDs for each Scenario Type Category
id_creator = pd.read_hdf("./hdfMoralMachineDataNN1")
id_creator.pop('Left_Saved')
id_creator.pop('Left_ExtendedSessionID')
id_creator = id_creator.apply(concatenate_row, axis=1)

# Version 2 dataframe (v1 + scenario type IDs)
joint_data = pd.read_hdf("./hdfMoralMachineDataNN1")
joint_data.insert(0, 'ID', id_creator)
joint_data.to_hdf("hdfMoralMachineDataNN2", key='dataframe')
print('version 2 data saved')

del joint_data

In [None]:
### ONLY USE if training neural network with naive splitting approach

# Preparing test_train data with naive approach
RANDOM_SPLIT = True
joint_data = pd.read_hdf("./hdfMoralMachineDataNN1")
if 'Left_Saved' in joint_data:
    joint_data.pop('Left_ExtendedSessionID')
    y = joint_data.pop('Left_Saved')
    X = joint_data
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)
del X, y, joint_data

In [None]:
### Dataset Status Check

joint_data = pd.read_hdf("./hdfMoralMachineDataNN1")
joint_data

In [None]:
### Intelligently Prepare Data for Splitting by Grouping and Sorting by Scenario Type Categories

# Read in data from version 1 and group by scenario type category (not including the respondent's answer)
joint_data = pd.read_hdf("./hdfMoralMachineDataNN1")
joint_data.pop('Left_ExtendedSessionID')
grouped = joint_data.groupby(joint_data.columns.tolist()[:-1]).size().reset_index()
del joint_data
sorted_g = grouped.sort_values([0], ascending=False)
del grouped
sorted_g = sorted_g.rename(columns={0: "ScenarioType_Count"})

# Randomize split for which scenario type category is in train and which is in test      
train_test = [False for i in range(len(sorted_g))]
considered = 0
while considered < len(train_test) - 5:   
    row = int(5 * random.random())
    train_test[row + considered] = True
    considered += 5

sorted_g.insert(42, 'Test', train_test)
del train_test
sorted_g

# Create the IDs used for each unique scenario type category by combining all 41 col values that make up a scenario
ids = sorted_g[sorted_g.columns.tolist()[:-2]].apply(concatenate_row, axis=1)

# Map from IDs to Train/Test set based on the random assignment from the grouped data above
mapping = {}
for item in ids.items():
    mapping[item[1]] = sorted_g['Test'][item[0]]

del sorted_g
del ids

joint_data = pd.read_hdf("./hdfMoralMachineDataNN2")
joint_data.pop('Left_ExtendedSessionID')

train_test_in_order = []
for item in joint_data['ID']:
    train_test_in_order.append(mapping[item])

joint_data.insert(42, "Test", train_test_in_order)

joint_data['Left_CrossingSignal'] = joint_data['Left_CrossingSignal'].replace(2, -1)

In [None]:
### Save Version 3 (Full Dataset prepared for intelligent split)

joint_data.to_hdf("hdfMoralMachineDataNN3", key='dataframe')
print('version 3 saved')

In [None]:
### Intelligently Split Prepared Dataset for Neural Network

# Read in data with true/false labels (prepared for intelligent split)
joint_data = pd.read_hdf("./hdfMoralMachineDataNN3")

all_ones = [1 for i in range(len(joint_data))]
joint_data.insert(2, "Bias2", all_ones)
joint_data = joint_data.astype({'Bias2': 'int8'})

#  split into train/test data
train, test = split_train_test(joint_data)

#  split into X, y (input, output) data for neural network
X_train, y_train = split_input_output(train)
X_test, y_test = split_input_output(test)

#  remove columns used for data preparation (not to be used for neural network training/testing)
X_train.pop('ID')
X_train.pop('Test')
X_test.pop('ID')
X_test.pop('Test')

del train
del test
del joint_data

In [None]:
### Build, Train, and Save Neural Network for Full Moral Machine Dataset

#  Build neural network
network = models.Sequential()
network.add(layers.Dense(32, activation='relu', input_shape=(42,)))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dense(2, activation='sigmoid'))
network.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

#  Fit neural network
history = network.fit(X_train, utils.to_categorical(y_train), 
                      epochs=100, batch_size=1024, validation_data=(X_test, utils.to_categorical(y_test)), callbacks=[es])

#  Save neural network
if RANDOM_SPLIT:
    model_json = network.to_json()
    with open("random_split_model.json", "w") as json_file:
        json_file.write(model_json)
    network.save_weights("random_split_model.h5")
    print("Saved random split model to disk")
else:
    model_json = network.to_json()
    with open("intelligent_split_model.json", "w") as json_file:
        json_file.write(model_json)
    network.save_weights("intelligent_split_model.h5")
    print("Saved intelligent split model to disk")

del X_train, y_train, X_test, y_test, model_json, history, es, network

In [None]:
###  Load and Test Standard Neural Network

json_file = open('intelligent_split_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("intelligent_split_model.h5")
print("Loaded intelligent split model from disk")
    
loaded_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
score = loaded_model.evaluate(X, utils.to_categorical(y), verbose=0)

print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

In [None]:
### Importing and Cleaning Survey Data for Available Respondents

data_types = {
    'SharedResponsesSurvey.csv': str,
    'ExtendedSessionID': str,
    'UserID': 'float64', 
    'LeftHand': 'float16', 
    'UserCountry3':str, 
    'Review_age':'float16', 
    'Review_education': str, 
    'Review_gender': str, 
    'Review_income': str, 
    'Review_political': 'float16',
    'Review_religious': 'float16'
}

columns = ['SharedResponsesSurvey.csv', 'ExtendedSessionID', 'UserID', 'LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious']
survey_path = "./SharedResponsesSurvey.csv.tar.gz"

survey = pd.read_csv(survey_path, usecols=columns, dtype=data_types)
survey = survey.replace([np.inf, -np.inf], np.nan).dropna()

survey = survey.drop_duplicates(subset='UserID').reset_index(drop=True)

category_columns = ['Review_gender', 'Review_education', 'Review_income', 'UserCountry3']
for col in category_columns:
    survey[col] = survey[col].astype('category')
    
int_columns = ['Review_age', 'LeftHand']
for col in int_columns:
    survey[col] = survey[col].astype('int8')

survey['UserID'] = survey['UserID'].astype('int64')
print(survey.info(memory_usage='deep'))

survey.to_hdf("hdfMoralMachineDataNNSurvey", key='dataframe', format='table')
print('version survey data saved')

del survey

In [None]:
### Combine Respondent data with Survey data

joint_data = pd.read_hdf("./hdfMoralMachineDataNN1")
survey = pd.read_hdf("./hdfMoralMachineDataNNSurvey")

survey_and_full = joint_data.merge(survey, left_on=['Left_ExtendedSessionID'], right_on=['ExtendedSessionID'], how='inner').drop(columns=['Left_ExtendedSessionID', 'SharedResponsesSurvey.csv'])
del survey
del joint_data

reorder_cols = [
'UserID',
'Bias',
'LeftHand',
'UserCountry3',
'Review_age',
'Review_education',
'Review_gender',
'Review_income',
'Review_political',
'Review_religious',
'Left_Man',
'Left_Woman',
'Left_CrossingSignal',
'Left_Pregnant',
'Left_Stroller',
'Left_OldMan',
'Left_OldWoman',
'Left_Boy',
'Left_Girl',
'Left_Homeless',
'Left_LargeWoman',
'Left_LargeMan',
'Left_Criminal',
'Left_MaleExecutive',
'Left_FemaleExecutive',
'Left_FemaleAthlete',
'Left_MaleAthlete',
'Left_FemaleDoctor',
'Left_MaleDoctor',
'Left_Dog',
'Left_Cat',
'Right_Man',
'Right_Woman',
'Right_Pregnant',
'Right_Stroller',
'Right_OldMan',
'Right_OldWoman',
'Right_Boy',
'Right_Girl',
'Right_Homeless',
'Right_LargeWoman',
'Right_LargeMan',
'Right_Criminal',
'Right_MaleExecutive',
'Right_FemaleExecutive',
'Right_FemaleAthlete',
'Right_MaleAthlete',
'Right_FemaleDoctor',
'Right_MaleDoctor',
'Right_Dog',
'Right_Cat',
'Left_Saved'
]
survey_and_full = survey_and_full[reorder_cols]                                                                                           

survey_and_full.to_hdf("hdfMoralMachineDataNNCombined", key='dataframe', format='table')
print('survey and full combined saved')

In [None]:
### Split Combined Accident Scenario Responses and Demographic Information Dataset into Training and Testing Sets

# Preparing combined survey respondent Data for Neural Network
survey_and_full = pd.read_hdf("./hdfMoralMachineDataNNCombined")

# Substitute strings for ints
lst = survey_and_full['UserCountry3'].unique().tolist()
country_map = {lst[i]: i for i in range(len(lst))}
lst = []
for item in survey_and_full['UserCountry3']:
    lst.append(country_map[item])
survey_and_full['UserCountry3'] = lst

lst = survey_and_full['Review_education'].unique().tolist()
education_map = {lst[i]: i for i in range(len(lst))}
lst = []
for item in survey_and_full['Review_education']:
    lst.append(education_map[item])
survey_and_full['Review_education'] = lst


lst = survey_and_full['Review_gender'].unique().tolist()
gender_map = {lst[i]: i for i in range(len(lst))}
lst = []
for item in survey_and_full['Review_gender']:
    lst.append(gender_map[item])
survey_and_full['Review_gender'] = lst

lst = survey_and_full['Review_income'].unique().tolist()
income_map = {lst[i]: i for i in range(len(lst))}
lst = []
for item in survey_and_full['Review_income']:
    lst.append(income_map[item])
survey_and_full['Review_income'] = lst

# Selectively include certain demographic information to see impact on accuracy
LEFTHAND = False
COUNTRY = False
AGE = False
EDU = False
GENDER = False
INCOME = False
POLITICAL = False
RELIGIOUS = False
ONES = False
DOUBLE_ONES = False
STANDARD = False

if LEFTHAND:
    survey_and_full = survey_and_full.drop(columns=['UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
elif COUNTRY:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
elif AGE:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
elif EDU:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
elif GENDER:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_income', 'Review_political', 'Review_religious'])
elif INCOME:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_political', 'Review_religious'])
elif POLITICAL:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_religious'])
elif RELIGIOUS:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political'])
elif ONES:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
    all_ones = [1 for i in range(len(survey_and_full))]
    survey_and_full.insert(2, "Ones", all_ones)
    survey_and_full = survey_and_full.astype({'Ones': 'int8'})
elif DOUBLE_ONES:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
    all_ones = [1 for i in range(len(survey_and_full))]
    survey_and_full.insert(2, "Ones", all_ones)
    survey_and_full.insert(2, "Ones2", all_ones)
    survey_and_full = survey_and_full.astype({'Ones': 'int8'})
    survey_and_full = survey_and_full.astype({'Ones2': 'int8'})
elif STANDARD:
    survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])


In [None]:
### Status Check on Combined Response + Demographic Dataset

survey_and_full

In [None]:
### Split Data with Naive Approach to Prepare for Neural Network

if 'Left_Saved' in survey_and_full:
    survey_and_full.pop('UserID')
    y = survey_and_full.pop('Left_Saved')
    X = survey_and_full

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

del X
del y
del survey_and_full
X_train

In [None]:
### Build, Train, and Save Neural Network on Expanded Dataset

if LEFTHAND or COUNTRY or AGE or EDU or GENDER or INCOME or POLITICAL or RELIGIOUS or ONES:
    num_cols = 43
else:
    num_cols = 50

network = models.Sequential()
network.add(layers.Dense(32, activation='relu', input_shape=(num_cols,)))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dense(2, activation='sigmoid'))
network.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

#  Fit neural network
history = network.fit(X_train, utils.to_categorical(y_train), epochs=100, batch_size=512, validation_data=(X_test, utils.to_categorical(y_test)), callbacks=[es])

if LEFTHAND:
    name = 'left'
elif COUNTRY:
    name = 'country'
elif AGE:
    name = 'age'
elif EDU:
    name = 'edu'
elif GENDER:
    name = 'gender'
elif INCOME:
    name = 'income'
elif POLITICAL:
    name = 'political'
elif RELIGIOUS:
    name = 'religious'
elif ONES:
    name = 'ones'
elif DOUBLE_ONES:
    name = 'double_ones'
elif STANDARD:
    name = 'standard'
else:
    name = 'all'

#  Save combined neural network
model_json = network.to_json()
json_filename = "survey_model_" + name + ".json"

with open(json_filename, "w") as json_file:
    json_file.write(model_json)

h5_filename = "survey_model_" + name + ".h5"
network.save_weights(h5_filename)
print("Saved survey", name, "intelligent split model to disk")

In [None]:
#  Load and Test Combined Neural Networks
names = ['political', 'religious', 'ones', 'all', 'left', 'country', 'age', 'edu', 'gender', 'income']
for name in names:
    json_filename = "survey_model_" + name + ".json"
    json_file = open(json_filename, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    h5_filename = "survey_model_" + name + ".h5"
    loaded_model.load_weights(h5_filename)
    print("Loaded", name, "survey model from disk")
    
    # split into train and test sets for neural network

    # Preparing combined survey respondent Data for Neural Network
    survey_and_full = pd.read_hdf("./hdfMoralMachineDataNNCombined")

    # Substitute strings for ints
    lst = survey_and_full['UserCountry3'].unique().tolist()
    country_map = {lst[i]: i for i in range(len(lst))}
    lst = []
    for item in survey_and_full['UserCountry3']:
        lst.append(country_map[item])
    survey_and_full['UserCountry3'] = lst

    lst = survey_and_full['Review_education'].unique().tolist()
    education_map = {lst[i]: i for i in range(len(lst))}
    lst = []
    for item in survey_and_full['Review_education']:
        lst.append(education_map[item])
    survey_and_full['Review_education'] = lst

    lst = survey_and_full['Review_gender'].unique().tolist()
    gender_map = {lst[i]: i for i in range(len(lst))}
    lst = []
    for item in survey_and_full['Review_gender']:
        lst.append(gender_map[item])
    survey_and_full['Review_gender'] = lst

    lst = survey_and_full['Review_income'].unique().tolist()
    income_map = {lst[i]: i for i in range(len(lst))}
    lst = []
    for item in survey_and_full['Review_income']:
        lst.append(income_map[item])
    survey_and_full['Review_income'] = lst

    if name == 'left':
        survey_and_full = survey_and_full.drop(columns=['UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
    elif name == 'country':
        survey_and_full = survey_and_full.drop(columns=['LeftHand', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
    elif name == 'age':
        survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
    elif name == 'edu':
        survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
    elif name == 'gender':
        survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_income', 'Review_political', 'Review_religious'])
    elif name == 'income':
        survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_political', 'Review_religious'])
    elif name == 'political':
        survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_religious'])
    elif name == 'religious':
        survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political'])
    elif name == 'ones':
        survey_and_full = survey_and_full.drop(columns=['LeftHand', 'UserCountry3', 'Review_age', 'Review_education', 'Review_gender', 'Review_income', 'Review_political', 'Review_religious'])
        all_ones = [1 for i in range(len(survey_and_full))]
        survey_and_full.insert(2, "Ones", all_ones)
        survey_and_full = survey_and_full.astype({'Ones': 'int8'})

    if 'Left_Saved' in survey_and_full:
        survey_and_full.pop('UserID')
        y = survey_and_full.pop('Left_Saved')
        X = survey_and_full
        del survey_and_full

    loaded_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    score = loaded_model.evaluate(X, utils.to_categorical(y), verbose=0)
    
    
    print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

In [None]:
### Build and Test Neural Network on Respondents without Survey Data

joint_data = pd.read_hdf("./hdfMoralMachineDataNN1")
survey = pd.read_hdf("./hdfMoralMachineDataNNSurvey")

no_survey = joint_data[~joint_data['Left_ExtendedSessionID'].isin(survey['ExtendedSessionID'])].drop(columns=['Left_ExtendedSessionID'])
del survey
del joint_data

In [None]:
# Split Dataset into Input and Output
if 'Left_Saved' in no_survey:
    y = no_survey.pop('Left_Saved')
    X = no_survey
    
# Split Dataset into Test and Train
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

del X
del y
del no_survey

In [None]:

network = models.Sequential()
network.add(layers.Dense(32, activation='relu', input_shape=(42,)))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dense(2, activation='sigmoid'))
network.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

#  Fit neural network
history = network.fit(X_train, utils.to_categorical(y_train), epochs=100, batch_size=512, validation_data=(X_test, utils.to_categorical(y_test)), callbacks=[es])


In [None]:
# Create MAML dataset for Mayank (NOT RELEVANT FOR FALL 2020 WRITTEN REPORT)

joint_data = pd.read_hdf("./hdfMoralMachineDataNN1")

mapping = joint_data.groupby('Left_ExtendedSessionID').size().reset_index()

mapping = dict(zip(mapping['Left_ExtendedSessionID'], mapping[0]))

count = []
for item in joint_data['Left_ExtendedSessionID']:
    count.append(mapping[item])

del mapping

joint_data.insert(44, 'Count', count)

del count

joint_data = joint_data[joint_data['Count'] >= 6]

joint_data[joint_data['Count'] == 6].to_hdf("hdfMoralMachineDataNN6", key='dataframe')

joint_data = joint_data[joint_data['Count'] > 6]

joint_data = joint_data.sort_values(by=['Left_ExtendedSessionID'])

joint_data = joint_data.reset_index().drop(columns=['index'])

start = 0
end = 0
all_indices = []
while end < len(joint_data):
    if end % 100000 < 9:
        print(end/len(joint_data)*100,'%')
    count = joint_data['Count'][start]
    end += count
    all_indices.extend(list(joint_data[start:end].sample(n=6).index.values))
    start = end
    
joint_data = joint_data.iloc[all_indices,:]
joint_data.drop(columns=['Count'])

joint_data6 = pd.read_hdf("./hdfMoralMachineDataNN6")
joint_data = joint_data.append(joint_data6)

joint_data = joint_data.sort_values(by=['Left_ExtendedSessionID']).drop(columns=['Count'])

joint_data = joint_data.reset_index().drop(columns=['index'])

joint_data.to_hdf("hdfMoralMachineDataNNMAML", key='dataframe')

del joint_data

joint_data = pd.read_hdf("./hdfMoralMachineDataNNMAML").to_numpy()

joint_data = joint_data.reshape(936982, 6, 44)

np.save('jcurlmamldata.npy', joint_data)