In [19]:
from pymongo import MongoClient
from pprint import pprint


In [5]:
# 1. Connect to MongoD
client = MongoClient("mongodb://localhost:27017/")

In [7]:
# 2. Create or access the database
db=client["salesdb"]

In [9]:
# Create collection - salesdata_collection
salesdata = db["salesdata_collection"]

# insert one document in the collection
salesdata.insert_one({"sales_id":1,
"country":"India",
"Product_detail" : {"product_id": 100,
                    "Amount": 5000,"Vendor_code":13, 
                    "Vendor_description": "this vendor is expert in providing mobile phones"}})

InsertOneResult(ObjectId('680b13ba196ab22dc9801bdc'), acknowledged=True)

In [11]:
# To list all collections under db
db.list_collection_names()

['salesdata_collection']

In [25]:
# To print multiple documents from the collection
for document in salesdata.find():
    pprint(document)

pymongo.collection.Collection

In [43]:
# Filter and find the given conditional document from the collection
query={"sales_id":1}
result=salesdata.find(query)
for i in result:
    pprint(i)

{'Product_detail': {'Amount': 5000,
                    'Vendor_code': 13,
                    'Vendor_description': 'this vendor is expert in providing '
                                          'mobile phones',
                    'product_id': 100},
 '_id': ObjectId('680b13ba196ab22dc9801bdc'),
 'country': 'India',
 'sales_id': 1}


In [45]:
# manually inserting ObjectId also #  objectid is a 24-digit hexadecimal strings
from bson import ObjectId
result = salesdata.insert_one({"_id":ObjectId("5955f4bb212e8fb45ca200d0"),
"sales_id":2,
"country":"India",
"Product_detail" : {"product_id": 101,
                    "Amount": 6000,"Vendor_code":13, 
                    "Vendor_description": "this vendor is expert in providing mobile phones"}})


In [49]:
result.inserted_id
result.acknowledged

True

In [None]:
# What are the Advantages and disadvantages of including  _id:ObjectId ?
# Advantages - Automatic Uniqueness, Efficient Indexing, Time-based Component, No manual mgmt
# Disadvantages - Size, Lack of readability(Hexa), No precise timestamp (default every minute),
# No suitable for few distributed system

In [56]:
from random import randint

In [58]:
# Inserting multiple documents

productcollection = db['product_feedback']

productlist = []
product_name = ['Mobile','TV','Washing machine', 'Refrigerator', 'Microwave Oven','Induction cooker','AC']
company_name = ['LG', 'Samsung', 'Bosch','Siemens','Whirlpool','Electrolux','Haier','Videocon']
for x in range(1, 1001):
    sale = {
        'name' : product_name[randint(0, (len(product_name)-1))],
        'rating' : randint(1, 5),
        'brand' : company_name[randint(0, (len(company_name)-1))] 
    }
    productlist.append(sale)

print(productlist)

[{'name': 'Microwave Oven', 'rating': 5, 'brand': 'Haier'}, {'name': 'Refrigerator', 'rating': 2, 'brand': 'Bosch'}, {'name': 'Mobile', 'rating': 3, 'brand': 'Electrolux'}, {'name': 'Washing machine', 'rating': 2, 'brand': 'Siemens'}, {'name': 'AC', 'rating': 2, 'brand': 'Samsung'}, {'name': 'Mobile', 'rating': 5, 'brand': 'Whirlpool'}, {'name': 'Refrigerator', 'rating': 4, 'brand': 'Whirlpool'}, {'name': 'Induction cooker', 'rating': 3, 'brand': 'Samsung'}, {'name': 'Induction cooker', 'rating': 5, 'brand': 'Siemens'}, {'name': 'Washing machine', 'rating': 3, 'brand': 'Electrolux'}, {'name': 'AC', 'rating': 4, 'brand': 'Electrolux'}, {'name': 'TV', 'rating': 5, 'brand': 'Samsung'}, {'name': 'AC', 'rating': 1, 'brand': 'Siemens'}, {'name': 'Microwave Oven', 'rating': 2, 'brand': 'Bosch'}, {'name': 'Microwave Oven', 'rating': 4, 'brand': 'Samsung'}, {'name': 'Induction cooker', 'rating': 5, 'brand': 'Electrolux'}, {'name': 'Washing machine', 'rating': 3, 'brand': 'Videocon'}, {'name': '

In [60]:
result = productcollection.insert_many(productlist)

In [68]:
# 
for item in productcollection.find():
    pprint(item)

{'_id': ObjectId('680b2e6a196ab22dc9801bdd'),
 'brand': 'Haier',
 'name': 'Microwave Oven',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bde'),
 'brand': 'Bosch',
 'name': 'Refrigerator',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801bdf'),
 'brand': 'Electrolux',
 'name': 'Mobile',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801be0'),
 'brand': 'Siemens',
 'name': 'Washing machine',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801be1'),
 'brand': 'Samsung',
 'name': 'AC',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801be2'),
 'brand': 'Whirlpool',
 'name': 'Mobile',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801be3'),
 'brand': 'Whirlpool',
 'name': 'Refrigerator',
 'rating': 4}
{'_id': ObjectId('680b2e6a196ab22dc9801be4'),
 'brand': 'Samsung',
 'name': 'Induction cooker',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801be5'),
 'brand': 'Siemens',
 'name': 'Induction cooker',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801be6'),
 'bran

In [74]:
# To see how many documents in collection
count = productcollection.count_documents({})  # Empty query matches all documents
count

1000

In [86]:
# Find all documents where the product name is 'Mobile':
query={"name":"Mobile"}
res=productcollection.find(query)
for item in res:
    pprint(item)

In [90]:
# Find the number of documents for the given condition where name=Mobile
count=productcollection.count_documents(query)
count

136

In [100]:
# search product with specific rating
query={"rating":5}
res=productcollection.find(query)
for i in res:
    pprint(i)

count=productcollection.count_documents(query)
print(count)

{'_id': ObjectId('680b2e6a196ab22dc9801bdd'),
 'brand': 'Haier',
 'name': 'Microwave Oven',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801be2'),
 'brand': 'Whirlpool',
 'name': 'Mobile',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801be5'),
 'brand': 'Siemens',
 'name': 'Induction cooker',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801be8'),
 'brand': 'Samsung',
 'name': 'TV',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bec'),
 'brand': 'Electrolux',
 'name': 'Induction cooker',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bf0'),
 'brand': 'Electrolux',
 'name': 'Refrigerator',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bf3'),
 'brand': 'Siemens',
 'name': 'Washing machine',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bfa'),
 'brand': 'LG',
 'name': 'AC',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801c06'),
 'brand': 'Videocon',
 'name': 'TV',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801c13'),
 'brand': 'Bosch',
 '

In [118]:
# Search Products with Rating Above 4
query={"rating":{"$gt":4}}  # "$gt" stands for 'greater than'

res=productcollection.find(query)
for i in res:
    pprint(i)

{'_id': ObjectId('680b2e6a196ab22dc9801bdd'),
 'brand': 'Haier',
 'name': 'Microwave Oven',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801be2'),
 'brand': 'Whirlpool',
 'name': 'Mobile',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801be5'),
 'brand': 'Siemens',
 'name': 'Induction cooker',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801be8'),
 'brand': 'Samsung',
 'name': 'TV',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bec'),
 'brand': 'Electrolux',
 'name': 'Induction cooker',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bf0'),
 'brand': 'Electrolux',
 'name': 'Refrigerator',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bf3'),
 'brand': 'Siemens',
 'name': 'Washing machine',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bfa'),
 'brand': 'LG',
 'name': 'AC',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801c06'),
 'brand': 'Videocon',
 'name': 'TV',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801c13'),
 'brand': 'Bosch',
 '

In [124]:
# Search for products with name 'Refrigerator' and rating 3
query={"name":"Refrigerator","rating":3}
res=productcollection.find(query)
for i in res:
    pprint(i)

{'_id': ObjectId('680b2e6a196ab22dc9801c08'),
 'brand': 'Haier',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c11'),
 'brand': 'Bosch',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c32'),
 'brand': 'Videocon',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c43'),
 'brand': 'Bosch',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c4b'),
 'brand': 'Siemens',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c60'),
 'brand': 'Siemens',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c63'),
 'brand': 'Electrolux',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c72'),
 'brand': 'Whirlpool',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c73'),
 'brand': 'Haier',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801ce4'),
 '

In [128]:
# Addioanl constrain with limiting 2 documents
res=productcollection.find(query).limit(2)
for i in res:
    pprint(i)

{'_id': ObjectId('680b2e6a196ab22dc9801c08'),
 'brand': 'Haier',
 'name': 'Refrigerator',
 'rating': 3}
{'_id': ObjectId('680b2e6a196ab22dc9801c11'),
 'brand': 'Bosch',
 'name': 'Refrigerator',
 'rating': 3}


In [138]:
# Search for product
# -- The product name is either "Mobile" or
# -- The product rating is greater than 4.
query = {
    "$or":[
        {"name":"Mobile"},
        {"rating":{"$gt":4}}
           ]
        }        
res=productcollection.find(query).limit(2)
for i in res:
    pprint(i)

{'_id': ObjectId('680b2e6a196ab22dc9801bdd'),
 'brand': 'Haier',
 'name': 'Microwave Oven',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801bdf'),
 'brand': 'Electrolux',
 'name': 'Mobile',
 'rating': 3}


In [None]:
# which brand has got maximum 5/5 rating
    # Filter out documents with a rating of 5.
    # Group the data by brand.
    # Count the number of 5/5 ratings for each brand.
    # Sort the results by the count in descending order.
    # Limit the output to show only the top brand.

In [178]:
query=[
     # Step 1: Filter documents with a rating of 5
    {"$match":{"rating":5}},  #Filter documents with a rating of 5
    
    # Step 2: Group by brand and count the number of 5/5 ratings
    {"$group":{
        "_id":"$brand",    # Group by brand
        "count":{"$sum":1} # count the number of 5 ratings for each brand
    }}, 
     # Step 3: Sort by count in descending order to get the brand with the most 5/5 ratings
    {"$sort": {"count": -1}}, 
    # {"$sort": {"count": -1,"brand": 1}}, For multiple column
     # Step 4: Limit to the top 1 result (most 5/5 ratings)
    {"$limit": 2}
]
result = productcollection.aggregate(query)

for i in result:
    pprint(i)

{'_id': 'Haier', 'count': 31}
{'_id': 'Whirlpool', 'count': 28}


In [186]:
# update those records where product name is AC to "Air conditioner
productcollection.update_many({"name":"AC"},{"$set":{"name":"Air Conditioner"}})


{'_id': ObjectId('680b2e6a196ab22dc9801be1'),
 'brand': 'Samsung',
 'name': 'Air Conditioner',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801be7'),
 'brand': 'Electrolux',
 'name': 'Air Conditioner',
 'rating': 4}
{'_id': ObjectId('680b2e6a196ab22dc9801be9'),
 'brand': 'Siemens',
 'name': 'Air Conditioner',
 'rating': 1}
{'_id': ObjectId('680b2e6a196ab22dc9801bf4'),
 'brand': 'Whirlpool',
 'name': 'Air Conditioner',
 'rating': 4}
{'_id': ObjectId('680b2e6a196ab22dc9801bf6'),
 'brand': 'Samsung',
 'name': 'Air Conditioner',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801bf7'),
 'brand': 'Bosch',
 'name': 'Air Conditioner',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801bf9'),
 'brand': 'Bosch',
 'name': 'Air Conditioner',
 'rating': 1}
{'_id': ObjectId('680b2e6a196ab22dc9801bfa'),
 'brand': 'LG',
 'name': 'Air Conditioner',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801c13'),
 'brand': 'Bosch',
 'name': 'Air Conditioner',
 'rating': 5}
{'_id': ObjectId('680b2e6

In [188]:
# To list the updated record
for i in productcollection.find({"name":"Air Conditioner"}):
    pprint(i)

{'_id': ObjectId('680b2e6a196ab22dc9801be1'),
 'brand': 'Samsung',
 'name': 'Air Conditioner',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801be7'),
 'brand': 'Electrolux',
 'name': 'Air Conditioner',
 'rating': 4}
{'_id': ObjectId('680b2e6a196ab22dc9801be9'),
 'brand': 'Siemens',
 'name': 'Air Conditioner',
 'rating': 1}
{'_id': ObjectId('680b2e6a196ab22dc9801bf4'),
 'brand': 'Whirlpool',
 'name': 'Air Conditioner',
 'rating': 4}
{'_id': ObjectId('680b2e6a196ab22dc9801bf6'),
 'brand': 'Samsung',
 'name': 'Air Conditioner',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801bf7'),
 'brand': 'Bosch',
 'name': 'Air Conditioner',
 'rating': 2}
{'_id': ObjectId('680b2e6a196ab22dc9801bf9'),
 'brand': 'Bosch',
 'name': 'Air Conditioner',
 'rating': 1}
{'_id': ObjectId('680b2e6a196ab22dc9801bfa'),
 'brand': 'LG',
 'name': 'Air Conditioner',
 'rating': 5}
{'_id': ObjectId('680b2e6a196ab22dc9801c13'),
 'brand': 'Bosch',
 'name': 'Air Conditioner',
 'rating': 5}
{'_id': ObjectId('680b2e6

In [218]:
# Load the json file from folder Open the database pes and load the employees.json
db=client['pes']

In [216]:
emp=db["test"]
for i in emp.find():
    pprint(i)

{'_id': ObjectId('680b83c9c78d8da0b16b5123'),
 'department': 'HR',
 'designation': 'Recruiter',
 'emp_id': 101,
 'joining_date': '2020-03-15',
 'name': 'Alice Johnson',
 'salary': 55000}
{'_id': ObjectId('680b83c9c78d8da0b16b5124'),
 'department': 'Finance',
 'designation': 'Accountant',
 'emp_id': 102,
 'joining_date': '2019-07-01',
 'name': 'Bob Smith',
 'salary': 60000}
{'_id': ObjectId('680b83c9c78d8da0b16b5125'),
 'department': 'IT',
 'designation': 'Software Engineer',
 'emp_id': 103,
 'joining_date': '2021-01-10',
 'name': 'Charlie Lee',
 'salary': 75000}
{'_id': ObjectId('680b83c9c78d8da0b16b5126'),
 'department': 'Marketing',
 'designation': 'Content Writer',
 'emp_id': 104,
 'joining_date': '2022-05-22',
 'name': 'Diana Patel',
 'salary': 48000}
{'_id': ObjectId('680b83c9c78d8da0b16b5127'),
 'department': 'Sales',
 'designation': 'Sales Executive',
 'emp_id': 105,
 'joining_date': '2020-11-30',
 'name': 'Edward Jones',
 'salary': 52000}


### Case Study

In [230]:
# Import business_data.json for further exercises
db=client["salesdb"]
business=db["business_data"]
res=business.find({})
for i in res:
    pprint(i)

{'_id': ObjectId('680b8bcdc78d8da0b16b512b'),
 'cuisine': 'Vegetarian',
 'name': 'Fun Pvt Ltd',
 'rating': 4}
{'_id': ObjectId('680b8bcdc78d8da0b16b512c'),
 'cuisine': 'Fast Food',
 'name': 'Pizza Pvt Ltd',
 'rating': 5}
{'_id': ObjectId('680b8bcdc78d8da0b16b512d'),
 'cuisine': 'Indian',
 'name': 'Kitchen Pvt Ltd',
 'rating': 2}
{'_id': ObjectId('680b8bcdc78d8da0b16b512e'),
 'cuisine': 'Italian',
 'name': 'Lazy Pvt Ltd',
 'rating': 3}
{'_id': ObjectId('680b8bcdc78d8da0b16b512f'),
 'cuisine': 'Chinese',
 'name': 'City Pvt Ltd',
 'rating': 4}
{'_id': ObjectId('680b8bcdc78d8da0b16b5130'),
 'cuisine': 'Mexican',
 'name': 'HomeFood Pvt Ltd',
 'rating': 5}
{'_id': ObjectId('680b8bcdc78d8da0b16b5131'),
 'cuisine': 'Vegetarian',
 'name': 'Sandwich Pvt Ltd',
 'rating': 1}
{'_id': ObjectId('680b8bcdc78d8da0b16b5132'),
 'cuisine': 'Indian',
 'name': 'State Pvt Ltd',
 'rating': 4}
{'_id': ObjectId('680b8bcdc78d8da0b16b5133'),
 'cuisine': 'American',
 'name': 'Ghar ka khana Pvt Ltd',
 'rating': 2}


1. Find the orders which has the low review i.e. 1

2. Find the Top 5 Highest Rated Businesses

3.Count the Number of Businesses in Each Cuisine Category

4.Find Businesses Offering "Pizza" Cuisine with a Rating of 4 or 5