## Books Database Set Up

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
!mongoimport --type csv -d books_db -c books --headerline --drop Resources/Books.csv
!mongoimport --type csv -d books_db -c users --headerline --drop Resources/Users.csv
!mongoimport --type csv -d books_db -c ratings --headerline --drop Resources/Ratings.csv
!mongoimport --type csv -d books_db -c geodata --headerline --drop Resources/GeoLoc.csv

2023-12-10T20:51:17.695-0500	connected to: mongodb://localhost/
2023-12-10T20:51:17.696-0500	dropping: books_db.books
2023-12-10T20:51:17.727-0500	2000 document(s) imported successfully. 0 document(s) failed to import.
2023-12-10T20:51:18.383-0500	connected to: mongodb://localhost/
2023-12-10T20:51:18.384-0500	dropping: books_db.users
2023-12-10T20:51:18.423-0500	3511 document(s) imported successfully. 0 document(s) failed to import.
2023-12-10T20:51:19.088-0500	connected to: mongodb://localhost/
2023-12-10T20:51:19.089-0500	dropping: books_db.ratings
2023-12-10T20:51:19.130-0500	4678 document(s) imported successfully. 0 document(s) failed to import.
2023-12-10T20:51:19.792-0500	connected to: mongodb://localhost/
2023-12-10T20:51:19.793-0500	dropping: books_db.geodata
2023-12-10T20:51:19.804-0500	48 document(s) imported successfully. 0 document(s) failed to import.


In [4]:
# confirm that our new database was created
mongo.list_database_names()

['admin',
 'books_db',
 'classDB',
 'config',
 'epa',
 'gardenDB',
 'local',
 'uk_food']

In [5]:
# assign the uk_food database to a variable name
db = mongo['books_db']

In [6]:
collections_to_drop_list=['merged','merged_ratings_users','merged_users_geo']
for i in collections_to_drop_list:
    if i in db.list_collection_names():
        db[i].drop()

In [7]:
# review the collections in our new database
db.list_collection_names()

['books', 'ratings', 'geodata', 'users']

In [8]:
# assign the collection to a variable
books=db['books']
ratings=db['ratings']
users=db['users']
geodata=db['geodata']

In [9]:
print("Example of a record within 'books' collection: ")
pprint(books.find_one({}))
print("\n Example of a record within 'ratings' collection:")
pprint(ratings.find_one({}))
print("\n Example of a record within 'users' collection:")
pprint(users.find_one({}))
print("\n Example of a record within 'geodata' collection:")
pprint(geodata.find_one({}))

Example of a record within 'books' collection: 
{'': 0,
 'Book-Author': 'Mark P. O. Morford',
 'Book-Title': 'Classical Mythology',
 'ISBN': 195153448,
 'Publisher': 'Oxford University Press',
 'Unnamed: 0': 0,
 'Year-Of-Publication': 2002,
 '_id': ObjectId('65766b156cc34468507da8b6')}

 Example of a record within 'ratings' collection:
{'': 0,
 'Book-Rating': 9,
 'ISBN': 2253063339,
 'Unnamed: 0': 273,
 'User-ID': 276939,
 '_id': ObjectId('65766b17e6033412ab87efab')}

 Example of a record within 'users' collection:
{'': 0,
 'Location': 'canada',
 'Unnamed: 0': 7,
 'User-ID': 8,
 '_id': ObjectId('65766b16e6cfdb12f79e8a11')}

 Example of a record within 'geodata' collection:
{'': 0,
 'Lat': 47.7563917,
 'Long': 7.0194428,
 'Unnamed: 0': 8,
 '_id': ObjectId('65766b17d3c26ba5d10cbdc7'),
 'country': 'australia'}


In [10]:
# merge all the data
pipeline = [{
    '$lookup': {
      'from': 'geodata',
      'localField': 'Location',
      'foreignField': 'country',
      'as': 'geo_data'
    }},
    {
    '$project': {
      '_id': 0,
      'Age': 1,
      'Location': 1,
      'User-ID': 1,
      'Geo-Data': { '$arrayElemAt': ['$geo_data', 0]}
    }}]

merged_users_geo_cursor=users.aggregate(pipeline)

for i in merged_users_geo_cursor:
    db['merged_users_geo'].insert_one(i)

pipeline = [{
    '$lookup': {
      'from': 'merged_users_geo',
      'localField': 'User-ID',
      'foreignField': 'User-ID',
      'as': 'user_data'
    }},
    {
    '$project': {
      '_id': 0,
      'Book-Rating': 1,
      'ISBN': 1,
      'User-ID': 1,
      'User':  { '$arrayElemAt': ['$user_data', 0] }
    }}]

merged_ratings_users_cursor=ratings.aggregate(pipeline)

for i in merged_ratings_users_cursor:
    db['merged_ratings_users'].insert_one(i)
    
pipeline = [{
    '$lookup': {
      'from': 'merged_ratings_users',
      'localField': 'ISBN',
      'foreignField': 'ISBN',
      'as': 'book_ratings_users'
    }
  },
  {
    '$project': {
      '_id': 0,
      'Book-Author': 1,
      'Book-Title': 1,
      'ISBN': 1,
      'Publisher': 1,
      'Year-Of-Publication': 1,
      'Ratings': "$book_ratings_users"
    }
  },
    {'$unset':['Ratings._id', 'Ratings.User._id', 'Ratings.User.Geo-Data._id']}
]
merged_cursor=books.aggregate(pipeline)

for i in merged_cursor:
    db['merged'].insert_one(i)
merged=db['merged']
    

In [11]:
print("Example of a record within 'merged' collection:")
pprint(merged.find_one({}))

Example of a record within 'merged' collection:
{'Book-Author': 'Mark P. O. Morford',
 'Book-Title': 'Classical Mythology',
 'ISBN': 195153448,
 'Publisher': 'Oxford University Press',
 'Ratings': [],
 'Year-Of-Publication': 2002,
 '_id': ObjectId('65766b198c2f7d2d9a7495da')}


In [12]:
db.list_collection_names()

['books',
 'merged_users_geo',
 'merged_ratings_users',
 'merged',
 'ratings',
 'geodata',
 'users']