In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/2-recommended-reads-conversion-of-data-to-num/__results__.html
/kaggle/input/2-recommended-reads-conversion-of-data-to-num/vectorizedData.csv
/kaggle/input/2-recommended-reads-conversion-of-data-to-num/__notebook__.ipynb
/kaggle/input/2-recommended-reads-conversion-of-data-to-num/__output__.json
/kaggle/input/2-recommended-reads-conversion-of-data-to-num/custom.css
/kaggle/input/2-recommended-reads-conversion-of-data-to-num/__results___files/__results___9_0.png
/kaggle/input/googlenewsvectors/GoogleNews-vectors-negative300.bin


LSTM SimpleRNN Embedding


In [2]:
from tensorflow import keras
from tensorflow.keras.layers import Input, Embedding, LSTM, SimpleRNN, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Step 1: Prepare the data
max_title_length = 50
max_author_length = 50
max_desc_length = 200
num_classes = 10
# Load the data
dataset = pd.read_csv("/kaggle/input/2-recommended-reads-conversion-of-data-to-num/vectorizedData.csv")
dataset = dataset.drop_duplicates(subset=['booktitle', 'authorname'], keep='first')

# Assuming you have a dataset object containing your data
# Split the data into input (X) and output (y) variables
X = dataset[['booktitle', 'authorname', 'bookdescription']].values
y = dataset['type'].values

# Tokenize and pad the input sequences
tokenizer = keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(X[:, 0])  # title
tokenizer.fit_on_texts(X[:, 1])  # author
tokenizer.fit_on_texts(X[:, 2])  # description

title_sequences = tokenizer.texts_to_sequences(X[:, 0])
title_sequences = pad_sequences(title_sequences, maxlen=max_title_length)
author_sequences = tokenizer.texts_to_sequences(X[:, 1])
author_sequences = pad_sequences(author_sequences, maxlen=max_author_length)
desc_sequences = tokenizer.texts_to_sequences(X[:, 2])
desc_sequences = pad_sequences(desc_sequences, maxlen=max_desc_length)

# Encode the target labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)
encoded_labels = to_categorical(encoded_labels)

# Split the data into training and testing sets
train_title, test_title, train_author, test_author, train_desc, test_desc, train_labels, test_labels = \
    train_test_split(title_sequences, author_sequences, desc_sequences, encoded_labels, test_size=0.2, random_state=42)

# Step 2: Build the model
title_input = Input(shape=(max_title_length,))
title_embedded = Embedding(len(tokenizer.word_index) + 1, 100)(title_input)
title_lstm = LSTM(128)(title_embedded)

author_input = Input(shape=(max_author_length,))
author_embedded = Embedding(len(tokenizer.word_index) + 1, 100)(author_input)
author_rnn = SimpleRNN(64)(author_embedded)

desc_input = Input(shape=(max_desc_length,))
desc_embedded = Embedding(len(tokenizer.word_index) + 1, 100)(desc_input)
desc_lstm = LSTM(128)(desc_embedded)

merged = keras.layers.concatenate([title_lstm, author_rnn, desc_lstm])
dense = Dense(128, activation='relu')(merged)
output = Dense(num_classes, activation='softmax')(dense)

model = Model(inputs=[title_input, author_input, desc_input], outputs=output)

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit([train_title, train_author, train_desc], train_labels, validation_data=([test_title, test_author, test_desc], test_labels), epochs=10, batch_size=64)




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x79f7b0dc59c0>