In [None]:
import pymongo
from dotenv import load_dotenv
import os
import logging

In [None]:
# Logging Params
# Refreshes the log file with each run, delete filemode='w' to append
logging.basicConfig(filename='mongoDBHandler.log', filemode='w', level=logging.DEBUG)

In [None]:
class MongoReco:
    def __init__(self):
        self.collections = self.start_mongo()
        self.collection_books = self.collections[0]
        self.collection_book_reviews = self.collections[1]
        self.collection_book_list = self.collections[2]

    def load_env_vars():
        # Load MongoDB credentials from .env file
        if os.environ.get('password') and os.environ.get('user'):
            del os.environ["password"]
            del os.environ["user"]
        
        load_dotenv()
        password = os.environ.get('password')
        user = os.environ.get('user')
        logging.info("Loaded MongoDB credentials from .env file")
        return password, user

    def load_mongo(password,user):
        SOURCE_DB = 'Goodreads'
        SOURCE_COLLECTION = 'Books'
        SOURCE_COLLECTION_REVIEWS = 'BookReviews'
        SOURCE_COLLECTION_BOOKLIST = 'BookList'
        MONGO_URI = f"mongodb+srv://{user}:{password}@recosystems.hyjorhd.mongodb.net/?retryWrites=true&w=majority"
        logging.info("Initializing MongoDB connection")
        logging.info("MongoDB URI: " + MONGO_URI)
        logging.info("MongoDB Database: " + SOURCE_DB)
        logging.info("User: " + user)
        client = pymongo.MongoClient(MONGO_URI)
        db = client[SOURCE_DB]
        collection_books = db[SOURCE_COLLECTION]
        collection_book_reviews = db[SOURCE_COLLECTION_REVIEWS]
        collection_book_list = db[SOURCE_COLLECTION_BOOKLIST]
        collections = [collection_books, collection_book_reviews, collection_book_list]
        logging.info("MongoDB connection initialized, collections loaded")
        return collections

    # MongoDB requires special characters to be encoded in the URI (% + ASCII code)

    def start_mongo(self):
        password, user = self.load_env_vars()
        collections = self.load_mongo(password,user)
        return collections
        


    # ALL METHODS RELATED TO THE SCRAPED STATUS OF BOOKS, WORKS IN CONJUCTION WITH THE BOOKLIST COLLECTION IN MONGODB
    # Function to check if the href exists in the collection 
    def href_exists(collection, href):
        if collection.find_one(href):
            return True
        else:
            return False

    # Function to init the href with a scraped value of 0 indicating that it has not been scraped yet
    def href_init(href):
        href = {"href": href, "scraped": 0}
        return href
        

    # Function to insert the href into the collection, if it does not already exist
    def insert_href_into_book_list(self, href, many=False):
        try:
                if many:
                    hrefs_with_scrape_signature = []
                    for href in href:
                        if not self.href_exists(self.collection_book_list, href):
                             href_with_scrape_signature = self.href_init(href)
                             hrefs_with_scrape_signature.append(href_with_scrape_signature)
                        else:
                            logging.info("href " + href + " already exists in " + str(self.collection_book_list))
                            logging.info("continuing to next href")
                            continue
                    self.collection_book_list.insert_many(hrefs_with_scrape_signature)
                    logging.info("Inserted " + len(hrefs_with_scrape_signature) + " hrefs into " + str(self.collection_book_list))
                else:
                    # Here href changes from a string to a dictionary and is inited with a scraped value of 0 indicating that it has not been scraped yet
                    href_with_scrape_signature = self.href_init(href)
                    self.collection_book_list.insert_one(href_with_scrape_signature)
                    logging.info("Inserted " + href + " into " + str(self.collection_book_list))
                    return True
        except Exception as e:
                logging.error("Failed to insert " + href + " into " + str(self.collection_book_list))
                logging.error(e)
                return False
        
    def insert_into_books(self, jsonResult, many=False):
        try:
            if many:
                self.collection_books.insert_many(jsonResult)
                for i in range(len(jsonResult)):
                    logging.info("Inserted " + jsonResult[i]['Title'] + " into " + str(self.collection_books))
                return True
            else:
                self.collection_books.insert_one(jsonResult)
                logging.info("Inserted " + jsonResult['Title'] + " into " + str(self.collection_books))
                return True
        except Exception as e:
            logging.error("Failed to insert " + jsonResult[i]['Title'] + " into " + str(self.collection_books))
            logging.error(e)
            return False
    
    # Reviews are inserted into the BookReviews collection
    def insert_into_book_reviews(self, jsonResult, many=False):
        try:
            if many:
                self.collection_book_reviews.insert_many(jsonResult)
                for i in range(len(jsonResult)):
                    logging.info("Inserted " + jsonResult[i]['Title'] + " into " + str(self.collection_book_reviews))
                return True
            else:
                self.collection_book_reviews.insert_one(jsonResult)
                logging.info("Inserted " + jsonResult['Title'] + " into " + str(self.collection_book_reviews))
                return True
        except Exception as e:
            logging.error("Failed to insert " + jsonResult[i]['Title'] + " into " + str(self.collection_book_reviews))
            logging.error(e)
            return False
        
"""     def insert_into_review_docs(self, jsonResult, many=False):
        try: 
            if many: 
        except Exception as e: """

        
    

         
    
        
    
        