In [1]:
from pymongo import MongoClient
import pandas as pd

In [2]:
client = MongoClient("mongodb://172.17.0.2:27017/")

In [3]:
db = client.sales_db
sales_collection = db.sales

In [4]:
sales_collection.delete_many({})

DeleteResult({'n': 749, 'ok': 1.0}, acknowledged=True)

In [5]:
sales_data = pd.read_csv('./sales.csv')
print(sales_data.head())
print(sales_data.shape)

         date product_id  sales_amount store_location
0  2024-01-01  PROD-1005       2192.85        Chicago
1  2024-01-01  PROD-1001       1846.72       New York
2  2024-01-01  PROD-1002       2367.94    Los Angeles
3  2024-01-01  PROD-1001       1523.68          Miami
4  2024-01-02  PROD-1004       2789.43        Phoenix
(734, 4)


In [6]:
sales_json = sales_data.to_dict(orient='records')

sales_collection.insert_many(sales_json)

print(f"Inserted {len(sales_json)} documents")

Inserted 734 documents


In [7]:
sample_sales_json = [
  {"date": "2021-08-28", "product_id": "PROD-1005", "sales_amount": 2645.89, "store_location": "New York"},
  {"date": "2021-08-28", "product_id": "PROD-1002", "sales_amount": 1598.23, "store_location": "New York"},
  {"date": "2021-08-29", "product_id": "PROD-1004", "sales_amount": 2789.56, "store_location": "New York"},
  {"date": "2024-08-29", "product_id": "PROD-1001", "sales_amount": 1678.45, "store_location": "New York"},
  {"date": "2024-08-30", "product_id": "PROD-1003", "sales_amount": 2534.89, "store_location": "New York"},
  {"date": "2024-08-30", "product_id": "PROD-1005", "sales_amount": 1823.67, "store_location": "New York"},
  {"date": "2021-08-31", "product_id": "PROD-1002", "sales_amount": 2612.34, "store_location": "New York"},
  {"date": "2024-09-12", "product_id": "PROD-1002", "sales_amount": 1645.45, "store_location": "New York"},
  {"date": "2024-09-13", "product_id": "PROD-1004", "sales_amount": 2456.78, "store_location": "New York"},
  {"date": "2024-09-13", "product_id": "PROD-1001", "sales_amount": 1734.56, "store_location": "New York"},
  {"date": "2024-09-14", "product_id": "PROD-1003", "sales_amount": 2689.34, "store_location": "New York"},
  {"date": "2024-09-14", "product_id": "PROD-1005", "sales_amount": 1598.89, "store_location": "New York"},
  {"date": "2020-09-15", "product_id": "PROD-1002", "sales_amount": 2534.67, "store_location": "New York"},
  {"date": "2024-09-15", "product_id": "PROD-1004", "sales_amount": 1867.23, "store_location": "New York"},
  {"date": "2024-09-16", "product_id": "PROD-1001", "sales_amount": 2712.89, "store_location": "New York"},
  {"date": "2024-09-16", "product_id": "PROD-1003", "sales_amount": 1623.56, "store_location": "New York"},
  {"date": "2020-09-17", "product_id": "PROD-1005", "sales_amount": 2423.45, "store_location": "New York"},
  {"date": "2024-09-17", "product_id": "PROD-1002", "sales_amount": 1789.67, "store_location": "New York"},
  {"date": "2024-09-18", "product_id": "PROD-1004", "sales_amount": 2589.78, "store_location": "New York"},
  {"date": "2021-09-18", "product_id": "PROD-1001", "sales_amount": 1645.34, "store_location": "New York"},
  {"date": "2021-09-19", "product_id": "PROD-1003", "sales_amount": 2734.56, "store_location": "New York"},
  {"date": "2021-09-19", "product_id": "PROD-1005", "sales_amount": 1534.89, "store_location": "New York"},
  {"date": "2024-09-20", "product_id": "PROD-1002", "sales_amount": 2412.67, "store_location": "New York"},
  {"date": "2024-09-20", "product_id": "PROD-1004", "sales_amount": 1876.45, "store_location": "New York"}
]

sales_collection.insert_many(sample_sales_json)

print(f"Inserted new {len(sample_sales_json)} documents")

Inserted new 24 documents


In [8]:
prod_1005_sales = sales_collection.find({'product_id': 'PROD-1005'})

print(f"Found {len(list(prod_1005_sales))} sales documents for PROD-1005")

Found 152 sales documents for PROD-1005


In [9]:
# Increasing sales amount for PROD-1002 sales documents by 25%
sales_collection.update_many(
    {'product_id': 'PROD-1002'},
    {'$mul': {'sales_amount': 1.25}}
)

UpdateResult({'n': 210, 'nModified': 210, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

In [10]:
# Deleting records with dates older than 2024-01-01

old_sales = sales_collection.find({'date': {'$lt': '2024-01-01'}})
print(f"Found {len(list(old_sales))} documents older than 2024-01-01")

sales_collection.delete_many({'date': {'$lt': '2024-01-01'}})

Found 9 documents older than 2024-01-01


DeleteResult({'n': 9, 'ok': 1.0}, acknowledged=True)