## Books Database Set Up

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
!mongoimport --type csv -d books_db -c books --headerline --drop Resources/Books.csv
!mongoimport --type csv -d books_db -c users --headerline --drop Resources/Users.csv
!mongoimport --type csv -d books_db -c ratings --headerline --drop Resources/Ratings.csv

2023-12-05T23:27:21.525-0500	connected to: mongodb://localhost/
2023-12-05T23:27:21.525-0500	dropping: books_db.books
2023-12-05T23:27:21.557-0500	2 document(s) imported successfully. 0 document(s) failed to import.
2023-12-05T23:27:21.691-0500	connected to: mongodb://localhost/
2023-12-05T23:27:21.692-0500	dropping: books_db.users
2023-12-05T23:27:21.737-0500	2 document(s) imported successfully. 0 document(s) failed to import.
2023-12-05T23:27:21.885-0500	connected to: mongodb://localhost/
2023-12-05T23:27:21.885-0500	dropping: books_db.ratings
2023-12-05T23:27:21.923-0500	2 document(s) imported successfully. 0 document(s) failed to import.


In [4]:
# confirm that our new database was created
mongo.list_database_names()

['admin', 'books_db', 'config', 'local', 'met', 'uk_food']

In [5]:
# assign the uk_food database to a variable name
db = mongo['books_db']

In [6]:
# review the collections in our new database
db.list_collection_names()

['books', 'users', 'ratings']

In [7]:
# assign the collection to a variable
books=db['books']
ratings=db['ratings']
users=db['users']

In [8]:
print("Example of a record within 'books' collection: ")
pprint(books.find_one({}))
print("\n Example of a record within 'ratings' collection:")
pprint(ratings.find_one({}))
print("\n Example of a record within 'users' collection:")
pprint(users.find_one({}))

Example of a record within 'books' collection: 
{'Book-Author': '34f5tgd',
 'Book-Title': 'sdf4stgv',
 'ISBN': 133,
 'Publisher': 'dfgdrgdrh',
 'Year-Of-Publication': 1444,
 '_id': ObjectId('656ff829c9c688c1c29988f4')}

 Example of a record within 'ratings' collection:
{'Book-Rating': 2,
 'ISBN': 195153448,
 'User-ID': 1,
 '_id': ObjectId('656ff829cddb091819d492ed')}

 Example of a record within 'users' collection:
{'Age': 12,
 'Location': 'USA',
 'User-ID': 1,
 '_id': ObjectId('656ff829bbe51ae133261f2a')}


In [9]:
# merge all the data
pipeline = [{
    '$lookup': {
      'from': 'users',
      'localField': 'User-ID',
      'foreignField': 'User-ID',
      'as': 'user_data'
    }},
    {
    '$project': {
      '_id': 0,
      'Book-Rating': 1,
      'ISBN': 1,
      'User-ID': 1,
      'User':  { '$arrayElemAt': ['$user_data', 0] }}}]

merged_ratings_users_cursor=ratings.aggregate(pipeline)

for i in merged_ratings_users_cursor:
    db['merged_ratings_users'].insert_one(i)
    
pipeline = [{
    '$lookup': {
      'from': 'merged_ratings_users',
      'localField': 'ISBN',
      'foreignField': 'ISBN',
      'as': 'book_ratings_users'
    }
  },
  {
    '$project': {
      '_id': 0,
      'Book-Author': 1,
      'Book-Title': 1,
      'ISBN': 1,
      'Publisher': 1,
      'Year-Of-Publication': 1,
      'Ratings': "$book_ratings_users",
    }
  },
    {'$unset':['Ratings._id', 'Ratings.User._id']}
]
merged_cursor=books.aggregate(pipeline)

for i in merged_cursor:
    db['merged'].insert_one(i)
merged=db['merged']
    

In [10]:
print("Example of a record within 'merged' collection:")
pprint(merged.find_one({}))

Example of a record within 'merged' collection:
{'Book-Author': '34f5tgd',
 'Book-Title': 'sdf4stgv',
 'ISBN': 133,
 'Publisher': 'dfgdrgdrh',
 'Ratings': [{'Book-Rating': 5,
              'ISBN': 133,
              'User': {'Age': 34, 'Location': 'Mexico', 'User-ID': 2},
              'User-ID': 2}],
 'Year-Of-Publication': 1444,
 '_id': ObjectId('656ff82a34bb1b9ccf4efc81')}


In [11]:
db.list_collection_names()

['books', 'merged_ratings_users', 'users', 'merged', 'ratings']