In [6]:
from flask import Flask, render_template, session, redirect, request, jsonify,url_for
from functools import wraps
import os
from pymongo import MongoClient
from flask_cors import CORS
import logging
from flask import Flask, request, jsonify
# from flask_pymongo import PyMongo
from bson.json_util import dumps
from bson.objectid import ObjectId
from datetime import datetime
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import smtplib

# from Recommender_System.recommender import (
#     get_content_based_recommendations, 
#     get_collaborative_filtering_recommendations,
#     get_hybrid_recommendations,
#     cosine_sim,
#     books
# )

import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity



In [8]:
app = Flask(__name__)
CORS(app)

app.secret_key = "your_secret_key"  # Add a secret key for session management

# Database connection

# Connect to MongoDB
# client = MongoClient("mongodb+srv://Samarth_7:Sam_mongo_atlas@iitdhcluster.a1gizlj.mongodb.net/?retryWrites=true&w=majority&appName=iitdhcluster")
client = MongoClient("mongodb://localhost:27017")


In [10]:

# Accessing the book from database
books_collection = client['all_books']['books2']
user_interactions_collection = client['all_books']['user_rating']

# Load your data here
books = pd.DataFrame(list(books_collection.find()))
books['book_id'] = range(1, len(books) + 1)
books['book_id'] = books['book_id'].apply(lambda x: str(x).zfill(6)).astype(int)
user_interactions = pd.DataFrame(list(user_interactions_collection.find()))



In [12]:
user_interactions

Unnamed: 0,_id,user_id,book_id,rating,borrow_count
0,6686ccf9e765d4b7afa42d78,1,101,5,10
1,6686ccf9e765d4b7afa42d79,1,102,3,5
2,6686ccf9e765d4b7afa42d7a,1,103,4,7
3,6686ccf9e765d4b7afa42d7b,2,101,4,2
4,6686ccf9e765d4b7afa42d7c,2,104,2,3
5,6686ccf9e765d4b7afa42d7d,3,102,5,6
6,6686ccf9e765d4b7afa42d7e,3,103,4,4
7,6686ccf9e765d4b7afa42d7f,3,104,3,1
8,6686ccf9e765d4b7afa42d80,3,105,1,2


In [14]:
# Ensure all entries in combined_features are strings
books['combined_features'] = (books['title'].fillna('') + ' ' + 
                              books['description'].fillna('') + ' ' + 
                              books['author'].fillna('') + ' ' + 
                              books['genre'].fillna('') + ' ' + 
                              books['department'].fillna(''))

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(books['combined_features'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [36]:
# Collaborative Filtering Setup
user_book_ratings = user_interactions.pivot(index='user_id', columns='book_id', values='rating').fillna(0)
R = user_book_ratings.values
user_ratings_mean = np.mean(R, axis=1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

In [38]:
user_borrow_count = user_interactions.pivot(index='user_id', columns='book_id', values='borrow_count').fillna(0)

In [40]:
user_book_ratings

book_id,101,102,103,104,105
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5.0,3.0,4.0,0.0,0.0
2,4.0,0.0,0.0,2.0,0.0
3,0.0,5.0,4.0,3.0,1.0


In [44]:
user_borrow_count

book_id,101,102,103,104,105
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,10.0,5.0,7.0,0.0,0.0
2,2.0,0.0,0.0,3.0,0.0
3,0.0,6.0,4.0,1.0,2.0


In [46]:
user_book_ratings+user_borrow_count

book_id,101,102,103,104,105
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,15.0,8.0,11.0,0.0,0.0
2,6.0,0.0,0.0,5.0,0.0
3,0.0,11.0,8.0,4.0,3.0


In [30]:
user_ratings_mean

array([2.4, 1.2, 2.6])

In [34]:
R


array([[5., 3., 4., 0., 0.],
       [4., 0., 0., 2., 0.],
       [0., 5., 4., 3., 1.]])

In [171]:
# Determine the appropriate value of k based on the shape of R_demeaned
num_users, num_books = R_demeaned.shape
k = min(num_users, num_books) - 1  # Set k to be less than the smaller dimension

# Perform matrix factorization with the adjusted value of k
U, sigma, Vt = svds(R_demeaned, k=k)
sigma = np.diag(sigma)
predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
predicted_ratings_books = pd.DataFrame(predicted_ratings, columns=user_book_ratings.columns)


In [175]:
# Define recommendation functions
def get_content_based_recommendations(title, cosine_sim=cosine_sim, num_recommendations=2):
    try:
        idx = books[books['title'] == title].index[0]
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:num_recommendations]
        book_indices = [i[0] for i in sim_scores]
        return books.iloc[book_indices][['title', 'author', 'genre']]
    except IndexError:
        logging.error(f"Title '{title}' not found in books dataset")
        return pd.DataFrame(columns=['title', 'author', 'genre'])

def get_collaborative_filtering_recommendations(user_id, num_recommendations=2):
    try:
        user_idx = user_id - 1
        user_ratings = predicted_ratings_books.iloc[user_idx]
        sorted_indices = np.argsort(user_ratings)[::-1]
        recommended_indices = sorted_indices[:num_recommendations]
        return books[books['book_id'].isin(predicted_ratings_books.columns[recommended_indices])][['title', 'author', 'genre']]
    except IndexError:
        logging.error(f"User ID '{user_id}' not found in user interactions dataset")
        return pd.DataFrame(columns=['title', 'author', 'genre'])

def get_hybrid_recommendations(title, user_id, cosine_sim, num_recommendations=3):
    content_based_recs = get_content_based_recommendations(title, cosine_sim)
    collaborative_recs = get_collaborative_filtering_recommendations(user_id, num_recommendations)
    combined_recs = pd.concat([content_based_recs, collaborative_recs]).drop_duplicates().head(num_recommendations)
    return combined_recs

In [177]:
@app.route('/recommend', methods=['GET'])
def recommend():
    try:
        title = request.args.get('title')
        print("\n\n", title,"\n\n")
        user_id = int(request.args.get('user_id'))
        print("\n\n", user_id,"\n\n")
        num_recommendations = int(request.args.get('num_recommendations', 2))
        
        logging.debug(f"Received recommendation request for title: {title}, user_id: {user_id}, num_recommendations: {num_recommendations}")
        
        # Validate parameters
        if title not in books['title'].values:
            logging.error(f"Title '{title}' not found in books dataset")
            return jsonify({"error": "Title not found"}), 404

        if user_id not in user_interactions['user_id'].values:
            logging.error(f"User ID '{user_id}' not found in user interactions dataset")
            return jsonify({"error": "User ID not found"}), 404

        recommendations = get_hybrid_recommendations(title, user_id, cosine_sim, num_recommendations)
        
        logging.debug(f"Generated recommendations: {recommendations}")
        
        return recommendations.to_json(orient='records')
    except Exception as e:
        logging.error(f"Error in recommendation process: {e}")
        return jsonify({"error": f"Failed to generate recommendations: {str(e)}"}), 500

In [49]:
accepted_book=client['admi']['accepted']

In [51]:
pd.DataFrame(accepted_book.find())

Unnamed: 0,_id,req,timestamp
0,668635909376052f5eb611e4,"{'_id': 6686342ef1b52ff5f11c5098, 'Name': 'dum...",2024-07-04T05:39:28.744980
1,668635979376052f5eb611e7,"{'_id': 6686341cf1b52ff5f11c5096, 'Name': 'dum...",2024-07-04T05:39:35.827938
2,6686359e9376052f5eb611e8,"{'_id': 6686340bf1b52ff5f11c5095, 'Name': 'dum...",2024-07-04T05:39:42.228735
3,668635e19376052f5eb611e9,"{'_id': 66863408f1b52ff5f11c5094, 'Name': 'dum...",2024-07-04T05:40:49.488179
4,668635e89376052f5eb611ea,"{'_id': 668633e8f1b52ff5f11c5092, 'Name': 'dum...",2024-07-04T05:40:56.549702
5,668635ec9376052f5eb611eb,"{'_id': 668633e1f1b52ff5f11c5091, 'Name': 'dum...",2024-07-04T05:41:00.621922
6,668635f19376052f5eb611ec,"{'_id': 668633d7f1b52ff5f11c5090, 'Name': 'dum...",2024-07-04T05:41:05.326994
7,668635f59376052f5eb611ed,"{'_id': 668633cff1b52ff5f11c508f, 'Name': 'dum...",2024-07-04T05:41:09.999823
8,6686360e9376052f5eb611ee,"{'_id': 66863404f1b52ff5f11c5093, 'Name': 'dum...",2024-07-04T05:41:34.013601
9,668636139376052f5eb611ef,"{'_id': 668633c9f1b52ff5f11c508e, 'Name': 'dum...",2024-07-04T05:41:39.352847


In [55]:
def most_issued_books(dataset, branch_year):
    """
    This function returns the two most issued books for a given branch and year.
    
    Parameters:
    - dataset: DataFrame containing book issue data with columns 'BookID', 'BranchYear', and 'IssuedCount'
    - branch_year: String representing the branch and year, e.g., 'EE22BT'
    
    Returns:
    - List of the two most issued books (book IDs)
    """
    
    # Filter the dataset for the given branch and year
    filtered_data = dataset[dataset['BranchYear'].str.startswith(branch_year)]
    
    # Sort the filtered data by IssuedCount in descending order
    sorted_data = filtered_data.sort_values(by='IssuedCount', ascending=False)
    
    # Get the top two most issued books
    top_books = sorted_data.head(2)['BookID'].tolist()
    
    return top_books



In [57]:
df = pd.read_json("/home/sachin-yadav/Downloads/admi.accepted.json")
df.head()

Unnamed: 0,_id,req,timestamp
0,{'$oid': '668635909376052f5eb611e4'},"{'_id': {'$oid': '6686342ef1b52ff5f11c5098'}, ...",2024-07-04 05:39:28.744980
1,{'$oid': '668635979376052f5eb611e7'},"{'_id': {'$oid': '6686341cf1b52ff5f11c5096'}, ...",2024-07-04 05:39:35.827938
2,{'$oid': '6686359e9376052f5eb611e8'},"{'_id': {'$oid': '6686340bf1b52ff5f11c5095'}, ...",2024-07-04 05:39:42.228735
3,{'$oid': '668635e19376052f5eb611e9'},"{'_id': {'$oid': '66863408f1b52ff5f11c5094'}, ...",2024-07-04 05:40:49.488179
4,{'$oid': '668635e89376052f5eb611ea'},"{'_id': {'$oid': '668633e8f1b52ff5f11c5092'}, ...",2024-07-04 05:40:56.549702


In [65]:
df['req']

0     {'_id': {'$oid': '6686342ef1b52ff5f11c5098'}, ...
1     {'_id': {'$oid': '6686341cf1b52ff5f11c5096'}, ...
2     {'_id': {'$oid': '6686340bf1b52ff5f11c5095'}, ...
3     {'_id': {'$oid': '66863408f1b52ff5f11c5094'}, ...
4     {'_id': {'$oid': '668633e8f1b52ff5f11c5092'}, ...
5     {'_id': {'$oid': '668633e1f1b52ff5f11c5091'}, ...
6     {'_id': {'$oid': '668633d7f1b52ff5f11c5090'}, ...
7     {'_id': {'$oid': '668633cff1b52ff5f11c508f'}, ...
8     {'_id': {'$oid': '66863404f1b52ff5f11c5093'}, ...
9     {'_id': {'$oid': '668633c9f1b52ff5f11c508e'}, ...
10    {'_id': {'$oid': '668633b1f1b52ff5f11c508d'}, ...
11    {'_id': {'$oid': '66863398f1b52ff5f11c508c'}, ...
12    {'_id': {'$oid': '6686338bf1b52ff5f11c508b'}, ...
13    {'_id': {'$oid': '6686336af1b52ff5f11c508a'}, ...
14    {'_id': {'$oid': '66863359f1b52ff5f11c5089'}, ...
15    {'_id': {'$oid': '668632e7f1b52ff5f11c5088'}, ...
16    {'_id': {'$oid': '668632d7f1b52ff5f11c5086'}, ...
17    {'_id': {'$oid': '668632baf1b52ff5f11c5085

In [115]:
import pandas as pd
import json

# Load the JSON data
with open('/home/sachin-yadav/Downloads/admi.accepted.json', 'r') as file:
    data = json.load(file)


In [121]:
client = MongoClient("mongodb://localhost:27017")
data = client['admi']['accepted']

In [139]:
# Fetch all records from the 'accepted' collection
records = list(data.find())

# Create a DataFrame from the MongoDB records
df = pd.DataFrame(records)
df.head()

Unnamed: 0,_id,req,timestamp
0,668635909376052f5eb611e4,"{'_id': 6686342ef1b52ff5f11c5098, 'Name': 'Sac...",2024-07-04T05:39:28.744980
1,668635979376052f5eb611e7,"{'_id': 6686341cf1b52ff5f11c5096, 'Name': 'Sac...",2024-07-04T05:39:35.827938
2,6686359e9376052f5eb611e8,"{'_id': 6686340bf1b52ff5f11c5095, 'Name': 'Sac...",2024-07-04T05:39:42.228735
3,668635e19376052f5eb611e9,"{'_id': 66863408f1b52ff5f11c5094, 'Name': 'Sac...",2024-07-04T05:40:49.488179
4,668635e89376052f5eb611ea,"{'_id': 668633e8f1b52ff5f11c5092, 'Name': 'Viv...",2024-07-04T05:40:56.549702


In [143]:
# Function to extract fields from 'req' dictionary
def extract_req_field(req, field):
    return req.get(field, None)

In [179]:
# Extract 'email' and 'bookname' into separate columns
df['email'] = df['req'].apply(lambda x: extract_req_field(x, 'email'))
df['bookname'] = df['req'].apply(lambda x: extract_req_field(x, 'bookname'))
df['author'] = df['req'].apply(lambda x: extract_req_field(x, 'author'))

In [181]:
# # Create a DataFrame from the JSON data
# records = []
# for entry in data:
#     req = entry.get('req', {})
#     records.append({
#         'BookName': req.get('bookname'),
#         'Email': req.get('email'),
#         'Timestamp': req.get('timestamp')
#     })

# df = pd.DataFrame(records)
# df.head()

In [183]:
df.columns

Index(['_id', 'req', 'timestamp', 'email', 'bookname', 'BranchYear', 'author'], dtype='object')

In [185]:
df.head()

Unnamed: 0,_id,req,timestamp,email,bookname,BranchYear,author
0,668635909376052f5eb611e4,"{'_id': 6686342ef1b52ff5f11c5098, 'Name': 'Sac...",2024-07-04T05:39:28.744980,220120019@iitdh.ac.in,Heat and Mass Transfer: Fundamentals and Appli...,220120,
1,668635979376052f5eb611e7,"{'_id': 6686341cf1b52ff5f11c5096, 'Name': 'Sac...",2024-07-04T05:39:35.827938,220120019@iitdh.ac.in,Mechanics of Materials,220120,
2,6686359e9376052f5eb611e8,"{'_id': 6686340bf1b52ff5f11c5095, 'Name': 'Sac...",2024-07-04T05:39:42.228735,220120019@iitdh.ac.in,The C Programming Language,220120,
3,668635e19376052f5eb611e9,"{'_id': 66863408f1b52ff5f11c5094, 'Name': 'Sac...",2024-07-04T05:40:49.488179,220120019@iitdh.ac.in,Engineering Mechanics: Statics,220120,
4,668635e89376052f5eb611ea,"{'_id': 668633e8f1b52ff5f11c5092, 'Name': 'Viv...",2024-07-04T05:40:56.549702,CE22BT012@iitdh.ac.in,Principles of Chemical Engineering Processes: ...,CE22BT,


In [187]:
# Function to extract branch year from email
def extract_branch_year(email):
    return str(email.split('@')[0])[0:-3:]

# Add a column for BranchYear
df['BranchYear'] = df['email'].apply(extract_branch_year)


In [189]:
df.head()

Unnamed: 0,_id,req,timestamp,email,bookname,BranchYear,author
0,668635909376052f5eb611e4,"{'_id': 6686342ef1b52ff5f11c5098, 'Name': 'Sac...",2024-07-04T05:39:28.744980,220120019@iitdh.ac.in,Heat and Mass Transfer: Fundamentals and Appli...,220120,
1,668635979376052f5eb611e7,"{'_id': 6686341cf1b52ff5f11c5096, 'Name': 'Sac...",2024-07-04T05:39:35.827938,220120019@iitdh.ac.in,Mechanics of Materials,220120,
2,6686359e9376052f5eb611e8,"{'_id': 6686340bf1b52ff5f11c5095, 'Name': 'Sac...",2024-07-04T05:39:42.228735,220120019@iitdh.ac.in,The C Programming Language,220120,
3,668635e19376052f5eb611e9,"{'_id': 66863408f1b52ff5f11c5094, 'Name': 'Sac...",2024-07-04T05:40:49.488179,220120019@iitdh.ac.in,Engineering Mechanics: Statics,220120,
4,668635e89376052f5eb611ea,"{'_id': 668633e8f1b52ff5f11c5092, 'Name': 'Viv...",2024-07-04T05:40:56.549702,CE22BT012@iitdh.ac.in,Principles of Chemical Engineering Processes: ...,CE22BT,


In [195]:
# Group by BookName and BranchYear to get the count of issues
issue_counts = df.groupby(['bookname', 'BranchYear']).size().reset_index(name='IssueCount')


In [197]:
issue_counts['author']=

Unnamed: 0,bookname,BranchYear,IssueCount
0,Algorithms Unlocked,220010,2
1,Analog Integrated Circuit Design,220020,21
2,Analog Integrated Circuit Design,220120,1
3,Chemical Process Safety: Fundamentals with App...,CE22BT,1
4,Clean Code: A Handbook of Agile Software Craft...,220010,1
5,Clean Code: A Handbook of Agile Software Craft...,220020,1
6,Communication Systems,220020,1
7,"Compilers: Principles, Techniques, and Tools",220020,1
8,Computer Architecture: A Quantitative Approach,220010,1
9,Computer Architecture: A Quantitative Approach,CE22BT,1


In [167]:
def most_issued_books(dataset, branch_year):
    filtered_data = dataset[dataset['BranchYear'].str.startswith(branch_year)]
    sorted_data = filtered_data.sort_values(by='IssueCount', ascending=False)
    top_books = sorted_data.head(2)['bookname'].tolist()
    return top_books

In [169]:
# Example usage
branch_year = '220120'  # Replace with the desired branch year
print(most_issued_books(issue_counts, branch_year))

['Analog Integrated Circuit Design', 'Engineering Mechanics: Statics']
