In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential, save_model, load_model
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from keras.metrics import MeanSquaredError
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import math

In [2]:
def merge_table(trait, data, traits_df):
    temp_data = data.copy()
    temp_traits = traits_df.copy()
    temp_data.insert(0, 'Response_ID', range(0, 0 + len(temp_data)))
    temp_traits.insert(0, 'Result_ID', range(0, 0 + len(temp_traits)))
    temp_traits = temp_traits[['Result_ID',trait]]
    reg_df = temp_data.merge(temp_traits, left_on='Response_ID', right_on='Result_ID')
    # reg_df.drop(['Result_ID', 'Response_ID'])
    return reg_df

In [3]:
def correlated(trait, reg_df):
    corr = reg_df.corr()
    corr = corr[trait]
    high_relation_qs = []
    for i in range(len(corr)-1):
        if abs(corr[i]) > 0.4:
            high_relation_qs.append(corr.index[i])
    return high_relation_qs

In [4]:
def NN_model(inputs):
	# create model
	model = Sequential()

	model.add(Dense(inputs, input_dim=inputs, kernel_initializer='normal', activation='relu'))
	model.add(Dense(20, activation='relu'))
	model.add(Dense(15, activation='relu'))
	model.add(Dense(10, activation='relu'))
	model.add(Dense(1, kernel_initializer='normal'))
	# Compile model
	model.compile(loss='mean_squared_error', optimizer='adam', metrics=[MeanSquaredError()])
	return model

In [5]:
def create_parameters(trait, high_relation_qs, data, traits_df):
    input_parameter = data[high_relation_qs].shape[1]
    X = data[high_relation_qs].to_numpy()
    y = traits_df[trait]
    return X, y, input_parameter

In [6]:
def plot_regression(y_test, y_predict):
    plt.figure(figsize=(16,16))
    plt.plot(range(100), y_test[:100], color="r", label="True "+ trait)
    plt.plot(range(100), y_predict[:100], color="b", label="Predicted "+ trait)
    plt.xlabel("Response Number")
    plt.ylabel(trait + " Value")
    plt.legend()

In [7]:
def bringing_it_together(trait):
    print("Trait: ", trait)

    data = pd.read_csv('questions.csv')
    traits_df = pd.read_csv('traits.csv')

    reg_df = merge_table(trait, data, traits_df)
    print(reg_df.head(5))
    high_relation_qs = correlated(trait, reg_df)
    X, y, input_parameter = create_parameters(trait, high_relation_qs, data, traits_df)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    model = NN_model(input_parameter)
    history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=[X_test, y_test])
    y_predict = model.predict(X_test)
    print("The r-sqaured score:", r2_score(y_test, y_predict))
    plot_regression(y_test, y_predict)
    
    model.save(trait+".h5")
    return (trait, high_relation_qs)


In [8]:
traits = ["Openness", "Agreeableness", "Persistence", "Cooperativeness", "Openness to Experience", "Adjustment", "Ambition", "Learning Approach"]

In [9]:
questions_dict = {}
for trait in traits:
    t, q = bringing_it_together(trait)
    questions_dict[t] = q

Trait:  Openness
   Response_ID  Unnamed: 0  Q1A  Q2A  Q3A  Q4A  Q5A  Q6A  Q7A  Q8A  ...  \
0            0           0    5    2    1    2    5    2    4    4  ...   
1            1           1    6    2    2    6    2    2    6    3  ...   
2            2           2    1    1    1    7    5    4    1    7  ...   
3            3           3    2    6    6    7    2    7    2    5  ...   
4            4           4    1    7    5    7    7    3    5    1  ...   

   Q174A  Q175A  Q176A  Q177A  Q178A  Q179A  Q180A  Q181A  Result_ID  Openness  
0      2      6      7      6      1      1      3      4          0        53  
1      6      2      3      3      5      1      2      5          1        61  
2      7      7      3      7      7      7      7      7          2        85  
3      6      5      1      6      1      1      5      6          3        68  
4      6      7      2      6      7      2      7      7          4        87  

[5 rows x 185 columns]
Epoch 1/100
 1/43 [...

ValueError: in user code:

    c:\users\dell\anaconda3\lib\site-packages\keras\engine\training.py:1330 test_function  *
        return step_function(self, iterator)
    c:\users\dell\anaconda3\lib\site-packages\keras\engine\training.py:1320 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\users\dell\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\dell\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\dell\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\dell\anaconda3\lib\site-packages\keras\engine\training.py:1313 run_step  **
        outputs = model.test_step(data)
    c:\users\dell\anaconda3\lib\site-packages\keras\engine\training.py:1267 test_step
        y_pred = self(x, training=False)
    c:\users\dell\anaconda3\lib\site-packages\keras\engine\base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    c:\users\dell\anaconda3\lib\site-packages\keras\engine\input_spec.py:199 assert_input_compatibility
        raise ValueError('Layer ' + layer_name + ' expects ' +

    ValueError: Layer sequential expects 1 input(s), but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 16) dtype=int64>, <tf.Tensor 'ExpandDims:0' shape=(None, 1) dtype=int64>]


In [None]:
print(questions_dict)

{}
