In [45]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Sample Data
data = {
    "Sentence": [
        "I love meeting new people and enjoy lively discussions.",
        "I prefer written communication and need time alone to recharge.",
        "I trust facts and data more than feelings.",
        "I often think about the future and imagine the possibilities.",
        "I make decisions based on my values and how it affects others.",
        "I am very organized and like to plan things in advance.",
        "I enjoy exploring details and practical applications.",
        "I like to keep my options open and enjoy spontaneity."
    ],
    "E_I": ["E", "I", "I", "I", "E", "I", "E", "E"],  # Adjusted for actual binary classification
    "S_N": ["S", "N", "S", "N", "S", "N", "S", "N"],  # Adjusted for actual binary classification
    "T_F": ["T", "F", "T", "F", "T", "F", "T", "F"],  # Adjusted for actual binary classification
    "J_P": ["J", "P", "J", "P", "J", "P", "J", "P"]   # Adjusted for actual binary classification
}

df = pd.DataFrame(data)

# Preprocessing
vocab_size = 1000
embedding_dim = 32
max_length = 20
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(df['Sentence'])
sequences = tokenizer.texts_to_sequences(df['Sentence'])
padded = pad_sequences(sequences, maxlen=max_length)

label_encoder = LabelEncoder()
E_I_labels = label_encoder.fit_transform(df['E_I'])
S_N_labels = label_encoder.fit_transform(df['S_N'])
T_F_labels = label_encoder.fit_transform(df['T_F'])
J_P_labels = label_encoder.fit_transform(df['J_P'])

# Combine the labels
labels = np.vstack((E_I_labels, S_N_labels, T_F_labels, J_P_labels)).T

# Split the data
X_train, X_test, y_train, y_test = train_test_split(padded, labels, test_size=0.2, random_state=42)

array([0, 1, 1, 1, 0, 1, 0, 0])

## CNN

In [42]:
"""
Here we produce our baseline model based on the structure of the CNN approach in the paper.
Multilabel classification was used: 4 binary classifiers. 
Output layer consisted of 4 neurons with a sigmoid activation function. 
1D convolution of word embeddings were created and fed as input to the neural network. 
The neural network consisted of a max pooling layer and a dense layer, a sigmoid layer for obtaining multilabel classification results of the 4 binary classifiers.
"""
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    Conv1D(256, 3, activation='relu'),  # Smaller filter and fewer filters
    MaxPooling1D(3),  # Increased pooling size
    GlobalMaxPooling1D(),
    Dense(16, activation='relu'),  # Reduced number of neurons
    Dense(4, activation='sigmoid')  # Output layer for four binary classifications
])

# Compile with a possibly different learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC()])

AttributeError: module 'tensorflow' has no attribute 'accuracy'

In [39]:
num_epochs = 10
# Train the Model
history = model.fit(X_train, y_train, epochs=num_epochs, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [41]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test loss", test_loss)
print("Test accuracy:", test_accuracy)

Test loss 0.7478247880935669
Test accuracy: 0.0
