## Books Database Set Up

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
!mongoimport --type csv -d books_db -c books --headerline --drop Resources/Books.csv
!mongoimport --type csv -d books_db -c users --headerline --drop Resources/Users.csv
!mongoimport --type csv -d books_db -c ratings --headerline --drop Resources/Ratings.csv
!mongoimport --type csv -d books_db -c geodata --headerline --drop Resources/GeoLoc.csv

2023-12-07T02:12:49.890-0500	connected to: mongodb://localhost/
2023-12-07T02:12:49.891-0500	dropping: books_db.books
2023-12-07T02:12:49.927-0500	4 document(s) imported successfully. 0 document(s) failed to import.
2023-12-07T02:12:50.068-0500	connected to: mongodb://localhost/
2023-12-07T02:12:50.068-0500	dropping: books_db.users
2023-12-07T02:12:50.105-0500	3 document(s) imported successfully. 0 document(s) failed to import.
2023-12-07T02:12:50.252-0500	connected to: mongodb://localhost/
2023-12-07T02:12:50.253-0500	dropping: books_db.ratings
2023-12-07T02:12:50.284-0500	4 document(s) imported successfully. 0 document(s) failed to import.
2023-12-07T02:12:50.435-0500	connected to: mongodb://localhost/
2023-12-07T02:12:50.435-0500	dropping: books_db.geodata
2023-12-07T02:12:50.470-0500	1 document(s) imported successfully. 0 document(s) failed to import.


In [4]:
# confirm that our new database was created
mongo.list_database_names()

['admin', 'books_db', 'config', 'local', 'met', 'uk_food']

In [5]:
# assign the uk_food database to a variable name
db = mongo['books_db']

In [6]:
# review the collections in our new database
db.list_collection_names()

['geodata', 'users', 'ratings', 'books']

In [7]:
# assign the collection to a variable
books=db['books']
ratings=db['ratings']
users=db['users']
geodata=db['geodata']

In [8]:
print("Example of a record within 'books' collection: ")
pprint(books.find_one({}))
print("\n Example of a record within 'ratings' collection:")
pprint(ratings.find_one({}))
print("\n Example of a record within 'users' collection:")
pprint(users.find_one({}))
print("\n Example of a record within 'geodata' collection:")
pprint(geodata.find_one({}))

Example of a record within 'books' collection: 
{'Book-Author': 'Mark P. O. Morford',
 'Book-Title': 'Classical Mythology',
 'ISBN': 195153448,
 'Publisher': 'Oxford University',
 'Year-Of-Publication': 2002,
 '_id': ObjectId('657170711b3124e9ccadde87')}

 Example of a record within 'ratings' collection:
{'Book-Rating': 0,
 'ISBN': 144,
 'User-ID': 3,
 '_id': ObjectId('657170720024e01a50b52771')}

 Example of a record within 'users' collection:
{'Age': 34,
 'Location': 'Mexico',
 'User-ID': 2,
 '_id': ObjectId('65717072e6e46ce6bf183f2c')}

 Example of a record within 'geodata' collection:
{'Lat': 54.235921,
 'Long': -4.5147671,
 '_id': ObjectId('657170720c79923d4bffe3cb'),
 'country': 'Mexico'}


In [9]:
# merge all the data
pipeline = [{
    '$lookup': {
      'from': 'geodata',
      'localField': 'Location',
      'foreignField': 'country',
      'as': 'geo_data'
    }},
    {
    '$project': {
      '_id': 0,
      'Age': 1,
      'Location': 1,
      'User-ID': 1,
      'Geo-Data': { '$arrayElemAt': ['$geo_data', 0]}
    }}]

merged_users_geo_cursor=users.aggregate(pipeline)

for i in merged_users_geo_cursor:
    db['merged_users_geo'].insert_one(i)

pipeline = [{
    '$lookup': {
      'from': 'merged_users_geo',
      'localField': 'User-ID',
      'foreignField': 'User-ID',
      'as': 'user_data'
    }},
    {
    '$project': {
      '_id': 0,
      'Book-Rating': 1,
      'ISBN': 1,
      'User-ID': 1,
      'User':  { '$arrayElemAt': ['$user_data', 0] }
    }}]

merged_ratings_users_cursor=ratings.aggregate(pipeline)

for i in merged_ratings_users_cursor:
    db['merged_ratings_users'].insert_one(i)
    
pipeline = [{
    '$lookup': {
      'from': 'merged_ratings_users',
      'localField': 'ISBN',
      'foreignField': 'ISBN',
      'as': 'book_ratings_users'
    }
  },
  {
    '$project': {
      '_id': 0,
      'Book-Author': 1,
      'Book-Title': 1,
      'ISBN': 1,
      'Publisher': 1,
      'Year-Of-Publication': 1,
      'Ratings': "$book_ratings_users"
    }
  },
    {'$unset':['Ratings._id', 'Ratings.User._id', 'Ratings.User.Geo-Data._id']}
]
merged_cursor=books.aggregate(pipeline)

for i in merged_cursor:
    db['merged'].insert_one(i)
merged=db['merged']
    

In [10]:
print("Example of a record within 'merged' collection:")
pprint(merged.find_one({}))

Example of a record within 'merged' collection:
{'Book-Author': 'Mark P. O. Morford',
 'Book-Title': 'Classical Mythology',
 'ISBN': 195153448,
 'Publisher': 'Oxford University',
 'Ratings': [{'Book-Rating': 2,
              'ISBN': 195153448,
              'User': {'Age': 12, 'Location': 'USA', 'User-ID': 1},
              'User-ID': 1}],
 'Year-Of-Publication': 2002,
 '_id': ObjectId('6571707282722cec07174c2b')}


In [11]:
db.list_collection_names()

['geodata',
 'users',
 'merged_users_geo',
 'ratings',
 'books',
 'merged',
 'merged_ratings_users']