In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Dense, Dropout
from keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
import numpy as np




In [2]:
file = 'book_data.csv'
red = pd.read_csv(file, encoding='ISO-8859-1')
red.head()

Unnamed: 0,book_authors,book_desc,book_edition,book_format,book_isbn,book_pages,book_rating,book_rating_count,book_review_count,book_title,genres,image_url,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16
0,Suzanne Collins,Winning will make you famous. Losing means cer...,,Hardcover,9780000000000.0,374 pages,4.33,5519135,160706,The Hunger Games,Young Adult|Fiction|Science Fiction|Dystopia|F...,https://images.gr-assets.com/books/1447303603l...,,,,,
1,J.K. Rowling|Mary GrandPrÃ©,There is a door at the end of a silent corrido...,US Edition,Paperback,9780000000000.0,870 pages,4.48,2041594,33264,Harry Potter and the Order of the Phoenix,Fantasy|Young Adult|Fiction,https://images.gr-assets.com/books/1255614970l...,,,,,
2,Harper Lee,The unforgettable novel of a childhood in a sl...,50th Anniversary,Paperback,9780000000000.0,324 pages,4.27,3745197,79450,To Kill a Mockingbird,Classics|Fiction|Historical|Historical Fiction...,https://images.gr-assets.com/books/1361975680l...,,,,,
3,Jane Austen|Anna Quindlen|Mrs. Oliphant|George...,Â«Ã cosa ormai risaputa che a uno scapolo in ...,"Modern Library Classics, USA / CAN",Paperback,9780000000000.0,279 pages,4.25,2453620,54322,Pride and Prejudice,Classics|Fiction|Romance,https://images.gr-assets.com/books/1320399351l...,,,,,
4,Stephenie Meyer,About three things I was absolutely positive.F...,,Paperback,9780000000000.0,498 pages,3.58,4281268,97991,Twilight,Young Adult|Fantasy|Romance|Paranormal|Vampire...,https://images.gr-assets.com/books/1361039443l...,,,,,


In [3]:
red.dropna(subset=['book_desc'], inplace=True)

In [4]:
red['book_rating'] = pd.to_numeric(red['book_rating'], errors='coerce')
red.dropna(subset=['book_rating'], inplace=True)

In [5]:
max_words = 3000
max_len = 100
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(red['book_desc'])
sequences = tokenizer.texts_to_sequences(red['book_desc'])
X = pad_sequences(sequences, maxlen=max_len)

In [6]:
label_encoder_title = LabelEncoder()
label_encoder_author = LabelEncoder()

red['book_title_encoded'] = label_encoder_title.fit_transform(red['book_title'])
red['book_author_encoded'] = label_encoder_author.fit_transform(red['book_authors'])

In [7]:
y_title = pd.get_dummies(red['book_title_encoded']).values
y_author = pd.get_dummies(red['book_author_encoded']).values
y_rating = red['book_rating'].astype(float).values

In [8]:
X_train, X_test, y_title_train, y_title_test, y_author_train, y_author_test, y_rating_train, y_rating_test = train_test_split(
    X, y_title, y_author, y_rating, test_size=0.3, random_state=42)

In [9]:
input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(max_words, 64)(input_layer)
lstm_layer = LSTM(64, dropout=0.2, recurrent_dropout=0.2, recurrent_initializer='glorot_uniform')(embedding_layer)
dropout_layer = Dropout(0.2)(lstm_layer)




In [10]:
# title
title_output = Dense(y_title.shape[1], activation='softmax', name='title_output')(dropout_layer)
# author
author_output = Dense(y_author.shape[1], activation='softmax', name='author_output')(dropout_layer)
# rating
rating_output = Dense(1, activation='linear', name='rating_output')(dropout_layer)

In [11]:
model = Model(inputs=input_layer, outputs=[title_output, author_output, rating_output])

In [12]:
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy', 'mean_squared_error'], 
              optimizer=Adam(), 
              metrics=['accuracy'])


In [13]:
model.fit(X_train, [y_title_train, y_author_train, y_rating_train], epochs=1 , batch_size=32)





<keras.src.callbacks.History at 0x1644851d6d0>

In [14]:
results = model.evaluate(X_test, [y_title_test, y_author_test, y_rating_test])
print(f"Loss: {results[0]}, Title Accuracy: {results[1]}, Author Accuracy: {results[3]}, Rating MSE: {results[5]}")

Loss: 20.309818267822266, Title Accuracy: 10.701712608337402, Author Accuracy: 0.1629408299922943, Rating MSE: 0.002121063880622387


In [15]:
def get_title_author_rating(pred):
    title_index = pred[0].argmax()
    author_index = pred[1].argmax()
    rating = pred[2]

    title = label_encoder_title.inverse_transform([title_index])[0]
    author = label_encoder_author.inverse_transform([author_index])[0]

    return title, author, rating

In [16]:
sample_text = red['book_desc'].iloc[0]
sample_seq = tokenizer.texts_to_sequences([sample_text])
sample_padded = pad_sequences(sample_seq, maxlen=max_len)
pred = model.predict(sample_padded)

title, author, rating = get_title_author_rating(pred)
print(f"Title: {title}, Author: {author}, Rating: {rating}")

Title: The Book Thief, Author: Meg Cabot, Rating: [[3.820393]]


In [17]:
model.save("my_model1.h5")

  saving_api.save_model(


## <center> Create API 

In [None]:
import pandas as pd
from flask import Flask, request, jsonify
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
import numpy as np
import os

app = Flask(__name__)
app.secret_key = 'e332c75bc8de5a684596e55242f9beb5c1cff28d8dc771618a90e82af17a2610'

# Check if model and data files exist
model_path = 'my_model.h5'
data_path = 'book_data.csv'

if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found: {model_path}")

if not os.path.exists(data_path):
    raise FileNotFoundError(f"Data file not found: {data_path}")

# Load the trained model
try:
    model = keras.models.load_model(model_path)
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    raise

# Load book data
try:
    data = pd.read_csv(data_path, encoding='ISO-8859-1')
    data.dropna(subset=['book_desc'], inplace=True)
    print("Data loaded successfully.")
except Exception as e:
    print(f"Error loading data: {e}")
    raise

# Prepare tokenizer
try:
    max_words = 1000
    max_len = 100
    tokenizer = Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(data['book_desc'])
    print("Tokenizer prepared successfully.")
except Exception as e:
    print(f"Error preparing tokenizer: {e}")
    raise

# Prepare label encoders
label_encoder_title = LabelEncoder()
label_encoder_author = LabelEncoder()
try:
    data['book_title_encoded'] = label_encoder_title.fit_transform(data['book_title'])
    data['book_author_encoded'] = label_encoder_author.fit_transform(data['book_authors'])
    print("Label encoders prepared successfully.")
except Exception as e:
    print(f"Error preparing label encoders: {e}")
    raise

@app.route("/get-response", methods=["POST"])
def get_response():
    user_input = request.json.get('description')
    if not user_input:
        return jsonify({"error": "No description provided"}), 400
    
    try:
        # Tokenize and pad the user input
        user_sequence = tokenizer.texts_to_sequences([user_input])
        user_padded = pad_sequences(user_sequence, maxlen=max_len)
        
        # Predict using the model
        predictions = model.predict(user_padded)
        
        # Get top 10 predictions for title and author
        top_title_indices = predictions[0][0].argsort()[-10:][::-1]
        top_author_indices = predictions[1][0].argsort()[-10:][::-1]
        
        response = []
        for i in range(10):
            title_index = top_title_indices[i]
            author_index = top_author_indices[i]
            rating = float(predictions[2][0][0])  # Use the same rating for simplicity
            
            title = label_encoder_title.inverse_transform([title_index])[0]
            author = label_encoder_author.inverse_transform([author_index])[0]
            
            response.append({
                "title": title,
                "author": author,
                "rating": rating
            })
        
        return jsonify(response)
    except Exception as e:
        print(f"Error processing request: {e}")
        return jsonify({"error": "Internal server error", "details": str(e)}), 500

if __name__ == "__main__":
    try:
        app.run(debug=True, use_reloader=False)  # Added use_reloader=False to prevent multiple instances
    except Exception as e:
        print(f"Error starting Flask app: {e}")
