# Data Pre-processing
Data source: https://www.kaggle.com/datasets/mdhamani/goodreads-books-100k/

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
!ls .

Dataset  drive	Models	sample_data


In [3]:
# Load dataset into a Pandas DataFrame
data = pd.read_csv("./Dataset/GoodReads_100k_books.csv")
data.size

1300000

In [4]:
data.head()

Unnamed: 0,author,bookformat,desc,genre,img,isbn,isbn13,link,pages,rating,reviews,title,totalratings
0,Laurence M. Hauptman,Hardcover,Reveals that several hundred thousand Indians ...,"History,Military History,Civil War,American Hi...",https://i.gr-assets.com/images/S/compressed.ph...,002914180X,9780000000000.0,https://goodreads.com/book/show/1001053.Betwee...,0,3.52,5,Between Two Fires: American Indians in the Civ...,33
1,"Charlotte Fiell,Emmanuelle Dirix",Paperback,Fashion Sourcebook - 1920s is the first book i...,"Couture,Fashion,Historical,Art,Nonfiction",https://i.gr-assets.com/images/S/compressed.ph...,1906863482,9780000000000.0,https://goodreads.com/book/show/10010552-fashi...,576,4.51,6,Fashion Sourcebook 1920s,41
2,Andy Anderson,Paperback,The seminal history and analysis of the Hungar...,"Politics,History",https://i.gr-assets.com/images/S/compressed.ph...,948984147,9780000000000.0,https://goodreads.com/book/show/1001077.Hungar...,124,4.15,2,Hungary 56,26
3,Carlotta R. Anderson,Hardcover,"""All-American Anarchist"" chronicles the life a...","Labor,History",https://i.gr-assets.com/images/S/compressed.ph...,814327079,9780000000000.0,https://goodreads.com/book/show/1001079.All_Am...,324,3.83,1,All-American Anarchist: Joseph A. Labadie and ...,6
4,Jean Leveille,,"Aujourdâ€™hui, lâ€™oiseau nous invite Ã sa ta...",,https://i.gr-assets.com/images/S/compressed.ph...,2761920813,,https://goodreads.com/book/show/10010880-les-o...,177,4.0,1,Les oiseaux gourmands,1


In [5]:
# Remove duplicates from df
data = data.drop_duplicates()

In [6]:
# Handle outliers (e.g., filter unrealistic ratings)
data = data[(data['rating'] >= 1) & (data['rating'] <= 10)]

# Remove columns that are not needed
data = data[['isbn','title','author','rating','reviews','img','desc','genre','pages']]

In [7]:
# Rename columns
data.rename(columns={'isbn':'ISBN','title':'Title','link':'Link','author':'Author','rating':'Rating','reviews':'No. of ratings','img':'Image','desc':'Desc','genre':'Genre','pages':'Pages'},inplace=True)


In [8]:
data.isnull().sum() # no. of null values
data = data.dropna(subset=['Genre'])
data = data.dropna(subset=['Desc'])
data = data.dropna(subset=['Image'])
data = data.dropna(subset=['ISBN'])
data = data.reset_index(drop=True)

In [9]:
# Filter out books with greater than 50 ratings
final_data = data[data['No. of ratings'] >= 50]
final_data = final_data.reset_index(drop=True)

In [10]:
final_data.head()

Unnamed: 0,ISBN,Title,Author,Rating,No. of ratings,Image,Desc,Genre,Pages
0,143037013,Happiness: Lessons from a New Science,Richard Layard,3.73,85,https://i.gr-assets.com/images/S/compressed.ph...,There is a paradox at the heart of our lives. ...,"Psychology,Nonfiction,Economics,Science,Philos...",320
1,385333706,After Long Silence,Helen Fremont,4.0,328,https://i.gr-assets.com/images/S/compressed.ph...,"""To this day, I don't even know what my mother...","Autobiography,Memoir,Nonfiction,World War II,H...",368
2,1414270658,Parnassus on Wheels,Christopher Morley,4.03,1214,https://i.gr-assets.com/images/S/compressed.ph...,"Roger Mifflin is part pixie, part sage, part n...","Fiction,Writing,Books About Books,Classics,Hum...",152
3,767923634,Irrational Exuberance,Robert J. Shiller,3.98,275,https://i.gr-assets.com/images/S/compressed.ph...,As Robert Shillerâ€™s new 2009 preface to his ...,"Economics,Economics,Finance,Business,Nonfictio...",304
4,765315696,Halo: Contact Harvest,Joseph Staten,3.98,271,https://i.gr-assets.com/images/S/compressed.ph...,"This is how it began...,It is the year 2524. H...","Science Fiction,Fiction,Games,Video Games,Spor...",396


In [11]:
# Save the cleaned dataset
final_data.to_csv('./Dataset/final_data.csv',index=False)

# TF-IDF Vectorization

In [12]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [13]:
# Load the dataset
df = pd.read_csv('./Dataset/final_data.csv')

In [14]:
df.head()

Unnamed: 0,ISBN,Title,Author,Rating,No. of ratings,Image,Desc,Genre,Pages
0,143037013,Happiness: Lessons from a New Science,Richard Layard,3.73,85,https://i.gr-assets.com/images/S/compressed.ph...,There is a paradox at the heart of our lives. ...,"Psychology,Nonfiction,Economics,Science,Philos...",320
1,385333706,After Long Silence,Helen Fremont,4.0,328,https://i.gr-assets.com/images/S/compressed.ph...,"""To this day, I don't even know what my mother...","Autobiography,Memoir,Nonfiction,World War II,H...",368
2,1414270658,Parnassus on Wheels,Christopher Morley,4.03,1214,https://i.gr-assets.com/images/S/compressed.ph...,"Roger Mifflin is part pixie, part sage, part n...","Fiction,Writing,Books About Books,Classics,Hum...",152
3,767923634,Irrational Exuberance,Robert J. Shiller,3.98,275,https://i.gr-assets.com/images/S/compressed.ph...,As Robert Shillerâ€™s new 2009 preface to his ...,"Economics,Economics,Finance,Business,Nonfictio...",304
4,765315696,Halo: Contact Harvest,Joseph Staten,3.98,271,https://i.gr-assets.com/images/S/compressed.ph...,"This is how it began...,It is the year 2524. H...","Science Fiction,Fiction,Games,Video Games,Spor...",396


In [15]:
# Preprocess the data
df.fillna('', inplace=True)
df['Desc'] = df['Desc'].str.lower()
df['Title'] = df['Title'].str.lower()

In [16]:
# Create a tf-idf vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Desc'] + ' ' + df['Title'] + ' ' + df['Genre'])
tfidf_matrix = tfidf_matrix.astype(np.float32)

In [17]:
tfidf_matrix.shape

(25151, 5000)

In [18]:
# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [19]:
# Save the cosine similarity matrix
np.save('./Models/cosine_sim.npy', cosine_sim)

In [20]:
def recommend_books_cosine(book_title, final_data, cosine_sim):
    # Check if the final_data DataFrame is empty
    if not final_data.empty:
        # Get the index of the book title
        idx = final_data[final_data['Title'] == book_title].index
        print(f"idx: {idx}")
        if len(idx) > 0:
            idx = idx[0]
            sim_scores = list(enumerate(cosine_sim[idx]))
            print(f"sim_scores: {sim_scores}")
            sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
            sim_scores = sim_scores[1:11]
            print(f"sim_scores top 10: {sim_scores}")
            book_indices = [i[0] for i in sim_scores]
            print(f"book_indices: {book_indices}")
            # return book title with image url and author
            return final_data[['Title', 'Image', 'Author']].iloc[book_indices]
        else:
            return "Book not found"
    else:
        return "No data available"

In [21]:
# Example usage
df = pd.read_csv('./Dataset/final_data.csv')
cosine_sim = np.load('./Models/cosine_sim.npy')
recommend_books_cosine('Python for Data Analysis', df, cosine_sim)

idx: Index([17273], dtype='int64')
sim_scores: [(0, 0.052499466), (1, 0.006910086), (2, 0.014754174), (3, 0.1083718), (4, 0.023390446), (5, 0.0), (6, 0.0), (7, 0.035820235), (8, 0.02284322), (9, 0.0403712), (10, 0.011541335), (11, 0.010319863), (12, 0.011790952), (13, 0.0), (14, 0.0770881), (15, 0.01734209), (16, 0.024195436), (17, 0.011981061), (18, 0.02952066), (19, 0.002775732), (20, 0.015381871), (21, 0.037851065), (22, 0.01493819), (23, 0.016667154), (24, 0.016730107), (25, 0.0), (26, 0.08717003), (27, 0.024586525), (28, 0.0059973733), (29, 0.016020594), (30, 0.012602329), (31, 0.011102149), (32, 0.0), (33, 0.020326976), (34, 0.04692241), (35, 0.012366842), (36, 0.061682064), (37, 0.002117056), (38, 0.015397036), (39, 0.003181843), (40, 0.0), (41, 0.0040365355), (42, 0.013023352), (43, 0.06450635), (44, 0.042030945), (45, 0.012059403), (46, 0.008196373), (47, 0.016456507), (48, 0.01817279), (49, 0.009498764), (50, 0.0033322682), (51, 0.028735735), (52, 0.03082693), (53, 0.01306932

Unnamed: 0,Title,Image,Author
7587,Think Stats,https://i.gr-assets.com/images/S/compressed.ph...,Allen B. Downey
21710,Interactive Data Visualization for the Web,https://i.gr-assets.com/images/S/compressed.ph...,Scott Murray
4176,Visualize This: The FlowingData Guide to Desig...,https://i.gr-assets.com/images/S/compressed.ph...,Nathan Yau
17397,The Elements of Statistical Learning: Data Min...,https://i.gr-assets.com/images/S/compressed.ph...,"Trevor Hastie,Robert Tibshirani,Jerome Friedman"
11727,Seven Databases in Seven Weeks: A Guide to Mod...,https://i.gr-assets.com/images/S/compressed.ph...,"Eric Redmond,Jim R. Wilson"
14950,NoSQL Distilled: A Brief Guide to the Emerging...,https://i.gr-assets.com/images/S/compressed.ph...,"Pramod J. Sadalage,Martin Fowler"
16922,Think Python,https://i.gr-assets.com/images/S/compressed.ph...,Allen B. Downey
20103,Big Data: A Revolution That Will Transform How...,https://i.gr-assets.com/images/S/compressed.ph...,"Viktor Mayer-SchÃ¶nberger,Kenneth Cukier"
20745,Predictive Analytics: The Power to Predict Who...,https://i.gr-assets.com/images/S/compressed.ph...,Eric Siegel
23440,Computer Networks,https://i.gr-assets.com/images/S/compressed.ph...,Andrew S. Tanenbaum


# NCF

Pre-process

In [26]:
# Assume 1000 users and assign random ratings between 1 and 5 to each book by users
num_users = 1000
num_ratings = len(df)

# Generate random user IDs
user_ids = np.random.randint(1, num_users + 1, num_ratings)

# Generate random ratings
ratings = np.random.randint(1, 6, num_ratings)

# Add user IDs and ratings to the dataset
df['user_id'] = user_ids
df['rating'] = ratings

In [27]:
# Prepare the data for NCF
df['user_id'] = df['user_id'].astype('category').cat.codes.values
df['ISBN'] = df['ISBN'].astype('category').cat.codes.values
df.to_csv("./Dataset/final_data_with_ratings.csv", index=False)

Model Build

In [28]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

In [29]:
# Select necessary columns for NCF
user_item_data = df[['user_id', 'ISBN', 'Rating']]

In [30]:
# Split the data into training and testing sets
train, test = train_test_split(user_item_data, test_size=0.2, random_state=42)

In [31]:
# Define a PyTorch Dataset class
class NCFDataset(Dataset):
    def __init__(self, data):
        self.users = data['user_id'].values
        self.items = data['ISBN'].values
        self.ratings = data['Rating'].values

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return {
            'user': torch.tensor(self.users[idx], dtype=torch.long),
            'item': torch.tensor(self.items[idx], dtype=torch.long),
            'rating': torch.tensor(self.ratings[idx], dtype=torch.float)
        }

In [32]:
# Create DataLoaders
train_dataset = NCFDataset(train)
test_dataset = NCFDataset(test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [33]:
# Define the NCF Model
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim * 2, 128)
        self.dropout = nn.Dropout(0.3)
        self.output = nn.Linear(128, 1)

    def forward(self, user, item):
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)
        x = torch.cat([user_emb, item_emb], dim=-1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.output(x)
        return x

In [34]:
# Initialize the model
num_users = user_item_data['user_id'].nunique()
num_items = user_item_data['ISBN'].nunique()

model = NCF(num_users, num_items)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [35]:
# Train the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

NCF(
  (user_embedding): Embedding(1000, 50)
  (item_embedding): Embedding(25151, 50)
  (fc1): Linear(in_features=100, out_features=128, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (output): Linear(in_features=128, out_features=1, bias=True)
)

In [36]:
for epoch in range(10):
    model.train()
    train_loss = 0
    for batch in train_loader:
        user = batch['user'].to(device)
        item = batch['item'].to(device)
        rating = batch['rating'].to(device)

        optimizer.zero_grad()
        predictions = model(user, item).squeeze()
        loss = criterion(predictions, rating)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"Epoch {epoch + 1}, Train Loss: {train_loss / len(train_loader):.4f}")

Epoch 1, Train Loss: 2.4135
Epoch 2, Train Loss: 0.3882
Epoch 3, Train Loss: 0.3207
Epoch 4, Train Loss: 0.2792
Epoch 5, Train Loss: 0.2436
Epoch 6, Train Loss: 0.2202
Epoch 7, Train Loss: 0.2010
Epoch 8, Train Loss: 0.1907
Epoch 9, Train Loss: 0.1753
Epoch 10, Train Loss: 0.1688


In [37]:
# Evaluate the model
model.eval()
test_loss = 0
with torch.no_grad():
    for batch in test_loader:
        user = batch['user'].to(device)
        item = batch['item'].to(device)
        rating = batch['rating'].to(device)

        predictions = model(user, item).squeeze()
        loss = criterion(predictions, rating)
        test_loss += loss.item()

print(f"Test Loss: {test_loss / len(test_loader):.4f}")

Test Loss: 0.1201


In [39]:
# Save the model
torch.save(model.state_dict(), './Models/ncf_model.pth')

# CNN

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

In [3]:
# Prepare the data for text CNN
df = pd.read_csv('./Dataset/final_data_with_ratings.csv')

In [4]:
df.head()

Unnamed: 0,ISBN,Title,Author,Rating,No. of ratings,Image,Desc,Genre,Pages,user_id,rating
0,3730,Happiness: Lessons from a New Science,Richard Layard,3.73,85,https://i.gr-assets.com/images/S/compressed.ph...,There is a paradox at the heart of our lives. ...,"Psychology,Nonfiction,Economics,Science,Philos...",320,437,1
1,13926,After Long Silence,Helen Fremont,4.0,328,https://i.gr-assets.com/images/S/compressed.ph...,"""To this day, I don't even know what my mother...","Autobiography,Memoir,Nonfiction,World War II,H...",368,867,3
2,2792,Parnassus on Wheels,Christopher Morley,4.03,1214,https://i.gr-assets.com/images/S/compressed.ph...,"Roger Mifflin is part pixie, part sage, part n...","Fiction,Writing,Books About Books,Classics,Hum...",152,229,1
3,21717,Irrational Exuberance,Robert J. Shiller,3.98,275,https://i.gr-assets.com/images/S/compressed.ph...,As Robert Shillerâ€™s new 2009 preface to his ...,"Economics,Economics,Finance,Business,Nonfictio...",304,196,4
4,21565,Halo: Contact Harvest,Joseph Staten,3.98,271,https://i.gr-assets.com/images/S/compressed.ph...,"This is how it began...,It is the year 2524. H...","Science Fiction,Fiction,Games,Video Games,Spor...",396,619,3


In [42]:
# Tokenize text data
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

max_words = 5000
max_len = 200

vectorizer = CountVectorizer(max_features=max_words)
text_sequences = vectorizer.fit_transform(df['Desc']).toarray()

In [44]:
# Pad sequences to ensure consistent length
padded_data = np.zeros((len(text_sequences), max_len))
# Change the slice to accommodate the actual shape of text_sequences
padded_data[:, :min(text_sequences.shape[1], max_len)] = text_sequences[:, :max_len]

In [45]:
# Prepare labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(df['Rating'])

In [46]:
# Split data into training and testing sets
train_size = int(0.8 * len(padded_data))
x_train, x_test = padded_data[:train_size], padded_data[train_size:]
y_train, y_test = labels[:train_size], labels[train_size:]

In [47]:
# Convert data to PyTorch tensors
x_train = torch.tensor(x_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.float32)

train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
test_dataset = torch.utils.data.TensorDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [48]:
# Define the CNN Model
class TextCNN(nn.Module):
    def __init__(self, max_words, embedding_dim=128, max_len=max_len):
        super(TextCNN, self).__init__()
        self.embedding = nn.Embedding(max_words, embedding_dim)
        self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=128, kernel_size=5, stride=1)
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.fc1 = nn.Linear(128, 128)
        self.dropout = nn.Dropout(0.5)
        self.output = nn.Linear(128, 1)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)  # Change shape for Conv1d
        x = torch.relu(self.conv1(x))
        x = self.pool(x).squeeze(-1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.output(x)
        return x

In [49]:
# Initialize the model
model = TextCNN(max_words)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [50]:
# Train the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(10):
    model.train()
    train_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        predictions = model(x_batch).squeeze()
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"Epoch {epoch + 1}, Train Loss: {train_loss / len(train_loader):.4f}")


Epoch 1, Train Loss: 2110.7787
Epoch 2, Train Loss: 1114.0943
Epoch 3, Train Loss: 1114.6508
Epoch 4, Train Loss: 1119.4654
Epoch 5, Train Loss: 1125.0291
Epoch 6, Train Loss: 1117.6807
Epoch 7, Train Loss: 1117.1409
Epoch 8, Train Loss: 1101.8723
Epoch 9, Train Loss: 1125.1422
Epoch 10, Train Loss: 1088.7910


In [51]:
# Evaluate the model
model.eval()
test_loss = 0
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        predictions = model(x_batch).squeeze()
        loss = criterion(predictions, y_batch)
        test_loss += loss.item()

print(f"Test Loss: {test_loss / len(test_loader):.4f}")

Test Loss: 829.0306


In [53]:
# Save the model
torch.save(model.state_dict(), './Models/text_cnn_model.pth')

# Hybrid System

In [54]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_books_cosine(book_title, final_data, cosine_sim):
    if not final_data.empty:
        idx = final_data[final_data['Title'] == book_title].index
        if len(idx) > 0:
            idx = idx[0]
            sim_scores = list(enumerate(cosine_sim[idx]))
            sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
            sim_scores = sim_scores[1:11]
            book_indices = [i[0] for i in sim_scores]
            return final_data[['Title', 'Image', 'Author']].iloc[book_indices]
        else:
            return "Book not found"
    else:
        return "No data available"

In [64]:
def hybrid_recommendation(book_title, df, cosine_sim, text_cnn_model, ncf_model, tokenizer, max_len=200):
    cosine_recs = recommend_books_cosine(book_title, df, cosine_sim)
    cnn_recs = []
    ncf_recs = []

    if not isinstance(cosine_recs, str):
        for idx, row in cosine_recs.iterrows():
            # Get the book title from the cosine_recs DataFrame
            book_title_from_cosine_recs = row['Title']

            # Use the book title to get the description from the original DataFrame (df)
            book_desc = df.loc[df['Title'] == book_title_from_cosine_recs, 'Desc'].values[0]

            desc_seq = vectorizer.transform([book_desc]).toarray()
            desc_tensor = torch.tensor(desc_seq, dtype=torch.long).to(device)
            with torch.no_grad():
                cnn_rating_pred = text_cnn_model(desc_tensor).item()
            cnn_recs.append((row['Title'], row['Author'], cnn_rating_pred))

            book_id = df[df['Title'] == row['Title']]['ISBN'].values[0]
            user_id = df['user_id'].iloc[0]
            user_tensor = torch.tensor([int(user_id)], dtype=torch.long).to(device)
            book_tensor = torch.tensor([int(book_id)], dtype=torch.long).to(device)
            with torch.no_grad():
                ncf_rating_pred = ncf_model(user_tensor, book_tensor).item()
            ncf_recs.append((row['Title'], row['Author'], ncf_rating_pred))

        cnn_recs = sorted(cnn_recs, key=lambda x: x[2], reverse=True)
        ncf_recs = sorted(ncf_recs, key=lambda x: x[2], reverse=True)

    return {'CNN': cnn_recs, 'NCF': ncf_recs}

In [61]:
# Load cosine similarity matrix
cosine_sim = np.load('./Models/cosine_sim.npy')

In [60]:
# Load pre-trained NCF model
ncf_model = NCF(num_users=1000, num_items=25151, embedding_dim=50)
ncf_model.load_state_dict(torch.load('./Models/ncf_model.pth'))
ncf_model.to(device)
ncf_model.eval()

  ncf_model.load_state_dict(torch.load('./Models/ncf_model.pth'))


NCF(
  (user_embedding): Embedding(1000, 50)
  (item_embedding): Embedding(25151, 50)
  (fc1): Linear(in_features=100, out_features=128, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (output): Linear(in_features=128, out_features=1, bias=True)
)

In [62]:
# Load the trained TextCNN model
text_cnn_model = TextCNN(max_words)
text_cnn_model.load_state_dict(torch.load('./Models/text_cnn_model.pth'))
text_cnn_model.to(device)
text_cnn_model.eval()

  text_cnn_model.load_state_dict(torch.load('./Models/text_cnn_model.pth'))


TextCNN(
  (embedding): Embedding(5000, 128)
  (conv1): Conv1d(128, 128, kernel_size=(5,), stride=(1,))
  (pool): AdaptiveMaxPool1d(output_size=1)
  (fc1): Linear(in_features=128, out_features=128, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (output): Linear(in_features=128, out_features=1, bias=True)
)

In [65]:
# Make hybrid recommendations
recommendations = hybrid_recommendation('Python for Data Analysis', df, cosine_sim, text_cnn_model, ncf_model, vectorizer)
print(recommendations)

{'CNN': [('Seven Databases in Seven Weeks: A Guide to Modern Databases and the NoSQL Movement', 'Eric    Redmond,Jim R. Wilson', 135.17738342285156), ('Big Data: A Revolution That Will Transform How We Live, Work, and Think', 'Viktor Mayer-SchÃ¶nberger,Kenneth Cukier', 134.354736328125), ('NoSQL Distilled: A Brief Guide to the Emerging World of Polyglot Persistence', 'Pramod J. Sadalage,Martin Fowler', 134.32518005371094), ('Predictive Analytics: The Power to Predict Who Will Click, Buy, Lie, or Die', 'Eric Siegel', 134.3062286376953), ('Visualize This: The FlowingData Guide to Design, Visualization, and Statistics', 'Nathan Yau', 134.06350708007812), ('The Elements of Statistical Learning: Data Mining, Inference, and Prediction', 'Trevor Hastie,Robert Tibshirani,Jerome Friedman', 133.98851013183594), ('Interactive Data Visualization for the Web', 'Scott Murray', 133.86668395996094), ('Think Python', 'Allen B. Downey', 131.8311004638672), ('Think Stats', 'Allen B. Downey', 131.71662902