## Assignment - 3

In this assignment, we will synthesize a JSON dataset.

Load the data into the MongoDB cluster

Demonstrate an aggregation query on the data

Save the results into a JSON format


### Loading the json file 

In [14]:
import json 

with open ('books.json', 'rb') as fin:
    lines = fin.readlines()
    books = [json.loads(line.strip()) for line in lines]

### Connect to MongoDB

In [15]:
import pymongo # pymongo is a python driver for MongoDB
from pymongo import MongoClient  # This is the import statement

import credentials # load username and password from credentials.py
connection_string = f"mongodb+srv://{credentials.username}:{credentials.password}@bd.s56sft1.mongodb.net/ism6562?retryWrites=true&w=majority"

In [16]:
client = pymongo.MongoClient(connection_string) # create a client object to connect to the database. get this cluster address from the MongoDB Atlas UI
db = client['my_library'] # this connects to an existing database called my_library or creates a new databse is my_library if it does not exist.

In [17]:
collection = db["books"] 

In [18]:
##Loading the data into the MongoDB collection

result = collection.insert_many(books)

### Query the collection

##### Counting the number of copies available grouped by the genre of the books and sorting the result in descending order

In [19]:
import json


In [20]:
pipeline = [
    {
        "$group": {
            "_id": "$genre",  # Grouping documents by 'genre'
            "available_books_count": {
                "$sum": "$copies_available"  # Summing up the 'copies_available' for each group
            }
        }
    },
    {
        "$sort": {
            "available_books_count": -1  # Sorting the result in descending order based on the count
        }
    }
]

# Running the aggregation pipeline
result = db['books'].aggregate(pipeline)

# Converting the result to a list of dictionaries
result_list1 = list(result)

# Converting the list of dictionaries to a JSON string
result_json = json.dumps(result_list1, indent=4)  # making the output more readable

# Printing the JSON string
print(result_json)

[
    {
        "_id": "Dystopian",
        "available_books_count": 19
    },
    {
        "_id": "Fantasy",
        "available_books_count": 18
    },
    {
        "_id": "Historical",
        "available_books_count": 18
    },
    {
        "_id": "Classic",
        "available_books_count": 18
    }
]


##### Counting the number of books(titles) grouped by the genre and sorting the result in descending order


In [21]:
pipeline = [
    {
        "$group": {
            "_id": "$genre",  # Grouping by genre
            "count": {"$sum": 1}  # Counting the number of books in each genre
        }
    },
    {
        "$sort": {"count": -1}  #Sorting genres by count in descending order
    }
]

# Assume db is your database connection and 'library' is your collection.
result = db['books'].aggregate(pipeline)

# Converting the result to a list of dictionaries
result_list2 = list(result)

# Converting the list of dictionaries to a JSON string
result_json = json.dumps(result_list2, indent=4)  # making the output more readable

# Printing the JSON string
print(result_json)

[
    {
        "_id": "Fantasy",
        "count": 7
    },
    {
        "_id": "Historical",
        "count": 6
    },
    {
        "_id": "Classic",
        "count": 6
    },
    {
        "_id": "Dystopian",
        "count": 5
    }
]


### Save the results from the query to either a JSON or BSON file format.

In [22]:
import bson.json_util as bju

In [23]:

with open("result1.json", "w") as fin:
    for record in result_list1:
        fin.write(bju.dumps(record, indent=2))
        fin.write('\n')

fin.close()

In [24]:
with open("result2.json", "w") as fin:
    for record in result_list2:
        fin.write(bju.dumps(record, indent=2))
        fin.write('\n')

fin.close()

In [26]:
client.close() # close the connection to the database