 ## Create the data

In [1]:
# import required modules
from mimesis.locales import Locale
from mimesis.keys import maybe
from mimesis.schema import Field, Schema
from mimesis import Datetime

dt = Datetime()
f = Field(locale=Locale.EN_GB, seed=42)

table_definition = {
    "person":{
        "amount":1000 *10
    },
    "product":{
        "amount":1000 *10
    },
    "order":{
        "amount":10000 *10
    },
    "artist":{
        "amount":500 *10
    },
    "review":{
        "amount":2000 *10
    }
}

# create data

## person

def person_generator() -> dict:
    first_name = f('first_name')
    last_name = f('last_name')
    return {
        "_id":f("uuid"),
        "first_name":first_name,
        "last_name":last_name,
        "name":first_name + " " + last_name,
        "company_name":f("company", key=maybe(None, probability=0.9)),
        "email":f("email"),
        "phone":f("phone_number"),
        "address":{
            "address_line_1":f("street_number")+" "+f("street_name"),
            "address_line_2":f('choice', items=['apt. 10','Suite. 23'], key=maybe(None, probability=0.9)),
            "city":f("city"),
            "country":f('choice', items=['England','Scotland', 'Wales', 'Northern Ireland']),
            "post_code":f("postal_code"),
            "coordinates":[f('latitude'), f('longitude')]
        }
    }

person_schema = Schema(
    schema=person_generator,
    iterations=table_definition['person']['amount']
)
person_data = person_schema.create()

person_id_count = table_definition['person']['amount']-1

print("person data created")

## artist

def artist_generator() -> dict:
    first_name = f("first_name")
    last_name = f("last_name")
    return {
        "_id":f("uuid"), 
        "first_name":first_name,
        "last_name":last_name,
        "name":first_name + " " + last_name,
        "company_name":f("company", key=maybe(None, probability=0.5)),
        "email":f("email"),
        "phone":f("phone_number"),
        "address":{
            "address_line_1":f("street_number")+" "+f("street_name"),
            "address_line_2":f('choice', items=['apt. 10','Suite. 23'], key=maybe(None, probability=0.9)),
            "city":f("city"),
            "country":f('choice', items=['England','Scotland', 'Wales', 'Northern Ireland']),
            "post_code":f("postal_code"),
            "coordinates":[f('latitude'), f('longitude')]
        }
    }

artist_schema = Schema(
    schema=artist_generator,
    iterations=table_definition['artist']['amount']
)

artist_data = artist_schema.create()

artist_id_count = table_definition['artist']['amount']-1

print("artist data created")

## product

def product_generator() -> dict:
    created_at = dt.datetime(start=2023, end=2023)
    quantity = f('integer_number', start=0, end=20)
    return {
        "_id":f("uuid"),
        "name":' '.join(f('words', quantity=2)),
        "description":' '.join(f('words', quantity=f('integer_number', start=8, end=25))),
        "category":f('choice', items=["oil paint", "watercolor", "acrylic paint", "charcoal", "pencil", "ink", "pastel", "collage", "digital art", "mixed media"]),
        "price":f('price', minimum=500, maximum=25000),
        "currency":f('currency_symbol'),
        "discount":f('float_number', start=0.2, end=0.8, precision=1, key=maybe(None, probability=0.8)),
        "quantity":quantity, 
        "image_url":f('stock_image_url'),
        "artist":artist_data[f('integer_number', start=0, end=artist_id_count)]['_id'],
        "creation_history": {
            "created_at":created_at,
            "quantity":quantity
        }
    }

product_schema = Schema(
    schema=product_generator,
    iterations=table_definition['product']['amount']
)

product_data = product_schema.create()

product_id_count = table_definition['product']['amount']-1

print("product data created")

## order

def order_generator() -> dict:
    person_number = f('integer_number', start=0, end=person_id_count)
    product_number = f('integer_number', start=0, end=product_id_count)
    shipping_address = person_data[person_number]['address']
    order_date = dt.datetime(start=2023, end=2023)
    return {
        "_id":f("uuid"),
        "person":person_data[person_number]['_id'],
        "product":product_data[product_number]['_id'],
        "product_name":product_data[product_number]['name'],
        "currency":product_data[product_number]['currency'],
        "discount":product_data[product_number]['discount'],
        "price":product_data[product_number]['price'],
        "quantity":f('integer_number', start=1, end=3),
        "order_date":order_date,
        "shipping_address":shipping_address,
        "payment_method":f('choice', items=['credit card','debit card', 'PayPal']),
        "order_status":f('choice', items=['pending','processing', 'shipped', 'delivered'], key=maybe(None, probability=0.1))
    }

order_schema = Schema(
    schema=order_generator,
    iterations=table_definition['order']['amount']
)

order_data = order_schema.create()

order_id_count = table_definition['order']['amount']-1

print("order data created")

## review

def review_generator() -> dict:
    return {
        "_id":f("uuid"),
        "person":person_data[f('integer_number', start=0, end=person_id_count)]['_id'],
        "product":product_data[f('integer_number', start=0, end=product_id_count)]['_id'],
        "artist":artist_data[f('integer_number', start=0, end=artist_id_count)]['_id'],
        "rating":f('choice', items=[1,2,3,4,5]),
        "review_text":' '.join(f('words', quantity=f('integer_number', start=8, end=50)))
    }

review_schema = Schema(
    schema=review_generator,
    iterations=table_definition['review']['amount']
)

review_data = review_schema.create()

review_id_count = table_definition['review']['amount']-1

print("review data created")


person data created
artist data created
product data created
order data created
review data created


 ## Load the data

In [2]:
from pymongo import MongoClient
from bson.binary import UuidRepresentation
from datetime import datetime,timezone
from uuid import uuid4

MONGODB_URI = "mongodb://localhost:27017/"

client = MongoClient(MONGODB_URI, uuidRepresentation='standard')
db = client["surreal_deal"]

# Create collections
person = db["person"]
product = db["product"]
order = db["order"]
artist = db["artist"]
review = db["review"]


In [3]:
%%time
person.insert_many(person_data)


CPU times: user 18.8 ms, sys: 2.5 ms, total: 21.3 ms
Wall time: 112 ms


InsertManyResult(['e7ee7205-658f-47fd-b779-b43ce672fe3b', '90bec870-acb8-41f3-b440-d2b08f7c5e70', 'e43d6336-9dca-4870-be8a-25c8a415776e', 'a3add6e8-5778-4376-b0d2-265858fc3443', 'dab27acb-ed79-47d1-b47b-7de4f0e15ad0', '7464920b-e661-4e37-b4ae-d955fe90376d', '2cfd8a27-1d94-47b5-bd71-f82958c66b0b', '45d62edf-7917-49ad-b1ae-90d11d229fb4', '5ba90c8f-501d-4050-a2b1-2fe04f47412f', '2e00b8c8-8e74-4f9b-87cd-b77a2317ce01', '2469ce18-e1c1-4295-9af7-5f1a6e4c12b1', 'dec87068-6179-4ac8-ae59-614c46e4b76d', '1b450333-cc7a-46cd-b334-af9c31706650', 'a78360ec-9c06-42a2-8556-f98e7ab76968', '51d405d2-54e9-46d2-90f7-e3f0da26838e', '1f5f7c0a-5003-43d8-961d-9b531715ea32', '3caf1fe0-d26d-48e8-8b6f-d2d406107621', 'c77eebb4-acb1-4da4-8d63-e8e44d941921', 'b1e3abb1-abde-488a-af56-fb92cdb106fa', 'ef54b5ac-ec3c-4938-b548-777a4edf61df', 'eae55156-940e-4450-b3a3-9f89d06340f6', '2efc15e5-dc88-4577-9fd6-9b7151af4a1a', '72627000-a084-46ec-a6c0-d19515631a08', '37894c6c-ec27-4296-9df5-e0c53cd0aa10', '2b4d73ec-7fd4-4c1f-92

In [4]:
%%time
product.insert_many(product_data)


CPU times: user 54.9 ms, sys: 4.76 ms, total: 59.7 ms
Wall time: 133 ms


InsertManyResult(['6af830c3-6620-4a86-82a8-3be6506728af', '6fa914f9-c352-4087-87c7-2c95dcf8a404', 'a2a56425-2ba1-4eb5-b15c-d6d63485605d', '3ab103c3-0a70-4b91-98ec-3e779033b47a', '930afba6-49a4-462c-82ab-bd406150e0b1', '89a70987-5f29-4a17-84cc-345e8334a947', '4b941cf3-1a88-4198-b5c3-d2f88f49eb7f', '6c1280d0-8923-4b3b-8f13-aa6f1914494b', '697a104c-db65-4d49-8cd1-18bad75d0a5a', 'a0292d23-5371-45e1-ae87-29dde79ad45a', '46fbfe25-7f32-4e22-a38b-63a0b539ca88', '07d1aa7d-d48a-49f2-af3c-448ddc90cef2', '9ea7b81f-7faf-470a-9280-d884ab80bdd8', 'c6ae48ca-4218-4fc8-9833-10f4b2f189f6', 'a6d50553-953d-4b69-b029-7e94ce55c0b0', '376894bf-a38e-4987-b90d-8808608be4e3', '6e268ab2-d979-4588-9532-ea232ae7001d', 'c2ed2b98-c2da-4146-bdd1-531326c93135', '42476aa4-0db1-44f1-94e1-cbd79e31495e', '97243d39-ceb1-41be-addd-5b84385feb09', '7652439f-990a-4a99-9c19-8e692f33355f', '001fadd1-585a-49bf-8827-3fba8254b698', '1412f463-8b22-45ee-ae71-26c7756219a8', '1141de0b-d70a-4d9f-9b7a-f36ee5b05b41', '9a4d3aa6-177f-4a37-87

In [5]:
%%time
order.insert_many(order_data)


CPU times: user 245 ms, sys: 24.6 ms, total: 270 ms
Wall time: 645 ms


InsertManyResult(['b4bf40f0-3018-43c4-b2e6-4f96766ae786', 'fa3c9055-d60f-4766-867b-f3e218099b48', 'd012b262-7a01-4ba3-be11-0495b22ffa1d', '25acb3e5-7653-49b8-befc-efadd862689c', '4e558ff4-27f1-40c5-8bd8-e1623960b9ac', '89f0c66a-2016-408c-b2da-17b8dae40126', 'cb3da3c3-3593-446c-ba93-46ee1efdf879', '26e39172-5826-4a5a-983d-e61ea50c1dca', 'f7c1979e-deb0-45e1-97de-f68c0aae5542', '9da2008f-6061-49df-a33f-2ed19de920fc', '4ab78702-4372-4d27-8598-a98a120cbe11', '915b67a5-376d-4262-9009-aea4c848ba38', '624800d7-e3f5-4eae-a01d-8eef9a64a678', 'da2ee11f-4a9f-4117-bc3a-45a3e2f100a3', 'cc7b32db-6251-4c70-8391-375e908ca7f7', '77896c33-cdca-49d1-9f2b-0ec5e1f423dc', '5537392e-8c18-43fe-813b-7d231e062860', '56974674-f221-455f-9468-19ae08018251', 'eaa8493e-0483-43d7-a957-a4ccdccfbc5f', '28d7ed6a-47c3-4446-88f9-b494127f09a5', 'c5707c8e-db03-4392-a3f6-2832c6dce379', 'c84dabc7-5aee-4d1d-857a-c54c27531d2a', '5fdcf5a3-e88d-4bff-a0c3-0dd569025207', '47398c8b-4b78-4fa4-96e1-3f6dfc504b5d', 'c90e5e82-bdab-4f20-8d

In [6]:
%%time
artist.insert_many(artist_data)


CPU times: user 10.5 ms, sys: 2.83 ms, total: 13.4 ms
Wall time: 68.9 ms


InsertManyResult(['21d71c4a-cd78-4de7-a184-cc96224b93e8', '2964059b-d01f-4601-8025-df6a2f5bb447', '39df5625-8840-4d73-9592-d0b3ede40686', '98dbd4a8-cef1-4c4e-a52f-a1b3ce1a606d', '16873e07-9d60-40f6-a1e4-4ea8098ecf4a', '802e2a08-151c-4b7b-926d-2ec1f36a43eb', 'b9cb7719-8d64-4450-838e-e1967c8c3d68', '15616c3c-c0a6-4cfb-b194-dc34ae2fcd2f', '2d373994-6e16-48a6-8210-37c6b3bd2115', '9f1760aa-466d-415b-81be-589a2cd5bd37', 'eecdbcf6-c26b-4998-8120-d2e7bb1dd830', '10b3cf12-1abb-4be1-ab0f-d8f9b04680d6', '2cc03a95-3847-4d7a-aeb0-35d9a5214d7d', '77fa1d33-afca-4276-b56c-24295f24c621', '8f06ee7a-fac0-45e4-a428-8213db53aaea', '3ec1c614-954f-4ac3-92e1-cb50f2304984', '81358f80-3b1f-415d-a1a0-e8b5e2e8ff65', '97d7ebdb-809a-4f7a-86af-d1d9d5416ec7', '11d3b7b5-1958-4ab7-8b38-dad7a919aa48', 'e6f49c30-8626-4017-bcc7-5ce3e8ed319f', '57101a99-2dcd-4157-b234-bd17b2f06a66', 'b62c4018-0bf9-4a3c-88b4-fd9bfc123b2c', '019dddfd-980b-4758-8d12-08beb913c991', '9623b102-8adc-4fb5-9739-155c31d51566', '26fbb088-4ed6-456c-8d

In [7]:
%%time
review.insert_many(review_data)


CPU times: user 23.6 ms, sys: 4.99 ms, total: 28.6 ms
Wall time: 118 ms


InsertManyResult(['7cecc5c8-2648-4a00-8ad6-c362fbf5ad58', '3d26d3f1-af2f-4365-9612-3f601f0b7a0a', '949a0779-6c52-4598-a888-3a5352db811a', '6bdc89ae-f7d8-4f92-b823-6c6d467a1faa', 'f6e6e053-84a3-41b4-812e-4abf48ca9431', 'c0b43ce6-de52-4d13-9257-5e36b6b5a1c5', '035ca524-2af6-4764-8253-179751f378ee', '5c38aa71-2c68-475f-bd2d-92ed5975d1ea', 'f1f103ee-3312-408e-80e5-aea41a288716', '77b7b56d-e793-4b70-b7f9-7f0d0ca39cc2', 'c403ed71-2eca-4019-82f3-8eb666c5f5f1', 'f46717d0-9c79-4bf2-87c4-b617ccdce8c6', '2748e2fc-0c5c-45bb-83dd-2aebab6c1609', '590523d2-ea56-42c6-8d46-6887dbfd110f', 'aca7d9e0-72f5-40f8-a7a4-5f39e7727858', '68daf2c5-ee56-4f53-a334-472c9044a38a', '1d1a5180-1d55-4cf8-99a9-5ab2232ca63d', '126eec2a-f065-4cc8-b662-78346e894350', '08165665-9cec-4c59-a81a-9da2743314c7', '86934fa9-0878-43ac-909f-ecc70ced7aaf', 'edad1ce7-3b11-4a84-b4b0-0581cf5724b5', '5bdc680a-d535-4352-9728-2f963d7b82ef', '2c0c2a38-4a2b-4547-9f1c-b143f3f245e7', '4786394d-f337-442d-a12b-6bbd0a5c12a6', '35a01eb2-3dfa-4bfc-87

 ## Run the queries

In [8]:
from random import randint
# getting just an array of ids to use for loops in some queries
person_ids = person.distinct("_id")
product_ids = product.distinct("_id")
order_ids = order.distinct("_id")
artist_ids = artist.distinct("_id")
review_ids = review.distinct("_id")

# data too big error: Executor error during distinct command :: caused by :: distinct too big, 16mb cap, full error: {'ok': 0.0, 'errmsg': 'Executor error during distinct command :: caused by :: distinct too big, 16mb cap', 'code': 17217, 'codeName': 'Location17217'}


 ### Q1: lookup vs record links

In [9]:
%%timeit
list(review.aggregate([
	{
		"$lookup": {
			"from": "artist",
			"localField": "artist",
			"foreignField": "_id",
			"pipeline": [
				{ "$project": { "_id": 0, "name": 1, "email": 1, "phone": 1 } }
			],
			"as": "artist",
		}

	},
	{
		"$lookup": {
			"from": "person",
			"localField": "person",
			"foreignField": "_id",
			"pipeline": [
				{ "$project": { "_id": 0, "name": 1, "email": 1, "phone": 1 } }
			],
			"as": "person",
		}

	},
	{
		"$lookup": {
			"from": "product",
			"localField": "product",
			"foreignField": "_id",
			"pipeline": [
				{ "$project": { "_id": 0, "name": 1, "category": 1, "price": 1 } }
			],
			"as": "product",
		}
	}
]))


1.64 s ± 35 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


 ### Q2: lookup vs graph

In [10]:
%%timeit
list(order.aggregate([
	{
		"$lookup": {
			"from": "person",
			"localField": "person",
			"foreignField": "_id",
			"pipeline": [
				{ "$project": { "_id": 0, "name": 1, "email": 1, "phone": 1 } }
			],
			"as": "person",
		}
	},
	{
		"$lookup": {
			"from": "product",
			"localField": "product",
			"foreignField": "_id",
			"pipeline": [
				{
					"$lookup": {
						"from": "artist",
						"localField": "artist",
						"foreignField": "_id",
						"pipeline": [
							{ "$project": { "_id": 0, "name": 1, "email": 1, "phone": 1 } }
						],
						"as": "artist",
					}
				},
				{ "$project": { "_id": 0, "category": 1, "description": 1, "image_url": 1, "artist": 1 } }
			],
			"as": "product",
		}
	},
	{ "$project": { "_id": 0, "price": 1, "order_date": 1, "product_name": 1, "artist": 1, "person": 1, "product": 1 } }
]))


9.01 s ± 89.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


 ### Q3: Name and email for all customers in England

In [11]:
%%time
person.create_index({"address.country": 1})

CPU times: user 849 µs, sys: 1.15 ms, total: 2 ms
Wall time: 44.1 ms


'address.country_1'

In [12]:
%%timeit
list(person.find(
	{ "address.country": "England" },
	{ "_id": 0, "name": 1, "email": 1 }
))


7.72 ms ± 552 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


 ### Q4: Count the number of confirmed orders in Q1 by artists in England

In [13]:
%%timeit
list(order.aggregate([
	{
		"$lookup": {
			"from": "product",
			"localField": "product",
			"foreignField": "_id",
			"pipeline": [
				{
					"$lookup": {
						"from": "artist",
						"localField": "artist",
						"foreignField": "_id",
						"pipeline": [
							{ "$project": { "_id": 0, "address.country": 1 } }
						],
						"as": "artist",
					}
				},
				{ "$project": { "_id": 0, "artist": 1 } }
			],
			"as": "product",
		}
	},
	{
		"$match": {
			"$or": [{ "order_status": "null" }, { "order_status": { "$ne": "pending" } }],
			"$expr": { "$lte": [{ "$month": "$order_date" }, 3] },
			"product.artist.address.country": "England",
		}
	},
	{ "$count": "count" }
]))


1.18 s ± 25.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


 ### Q5: Delete a specific review

In [14]:
%%time
review.delete_one({ "_id": review_ids[0] })


CPU times: user 597 µs, sys: 317 µs, total: 914 µs
Wall time: 682 µs


DeleteResult({'n': 1, 'ok': 1.0}, acknowledged=True)

 ### Q6: Delete reviews from a particular category

In [15]:
%%time
review.delete_many({ "product": { "$in": product.distinct("_id", { "category": "charcoal" }) } })


CPU times: user 965 µs, sys: 738 µs, total: 1.7 ms
Wall time: 19.4 ms


DeleteResult({'n': 2023, 'ok': 1.0}, acknowledged=True)

 ### Q7: Update a customer address

In [16]:
%%timeit
person.update_one(
	{ "_id": person_ids[randint(0, person_id_count)] },
	{
		"$set": {
			"address": {
				'address_line_1': '497 Ballycander',
				'address_line_2': "null",
				'city': 'Bromyard',
				'country': 'Wales',
				'post_code': 'ZX8N 4VJ',
				'coordinates': [68.772592, -35.491877]
			}
		}
	}
)


107 µs ± 653 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


 ### Q8: Update discounts for products

In [17]:
%%time
product.update_many(
	{ "price": { "$lt": 1000 } },
	{ "$set": { "discount": 0.2 } }
)


CPU times: user 502 µs, sys: 706 µs, total: 1.21 ms
Wall time: 3.82 ms


UpdateResult({'n': 237, 'nModified': 235, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

 ### Q9: "Transaction"* order from a new customer

In [18]:
%%timeit
# Transaction - order from a new customer

new_person_id = uuid4()
random_product_id = product_ids[randint(0, product_id_count)]

# insert into the person table
person.insert_one({
		'_id': new_person_id,
		'first_name': 'Karyl',
		'last_name': 'Langley',
		'name': 'Karyl Langley',
		'company_name': "null",
		'email': 'dee1961@gmail.com',
		'phone': '+44 47 3516 5895',
		'address': {
			'address_line_1': '510 Henalta',
			'address_line_2': "null",
			'city': 'Lyme Regis',
			'country': 'Northern Ireland',
			'post_code': 'TO6Q 8CM',
			'coordinates': [-34.345071, 118.564172]
		}
	})

# relate into the order table
order.insert_one({
	"_id": uuid4(),
	"person": new_person_id,
	"product": random_product_id,
	'currency': '£',
	'discount': db.product.distinct("discount", { "_id" : random_product_id }), 
	"order_date": datetime.now(tz=timezone.utc),
	"order_status": "pending",
	"payment_method": "PayPal",
	"price": db.product.distinct("price", { "_id" : random_product_id }),
	"product_name": db.product.distinct("name", { "_id" : random_product_id }),
	"quantity": 1,
	"shipping_address": db.person.distinct("address", { "_id" : new_person_id })
})

# update the product table to reduce the quantity
product.update_one(
	{ "_id": random_product_id },
	{ "$inc": { "quantity": -1 } }
)


809 µs ± 22.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


 ### Q10: "Transaction"* - New Artist creates their first product

In [19]:
%%timeit
# Transaction - New Artist creates their first product

new_artist_id = uuid4()

# insert into the artist table
artist.insert_one({
    '_id': new_artist_id,
    'first_name': 'Anderson',
    'last_name': 'West',
    'name': 'Anderson West',
    'company_name': 'Atkins(ws) (ATK)',
    'email': 'six1933@gmail.com',
    'phone': '056 5881 1126',
    'address': {
        'address_line_1': '639 Connaugh',
        'address_line_2': "null",
        'city': 'Ripon',
        'country': 'Scotland',
        'post_code': 'CG3U 4TH',
        'coordinates': [4.273648, -112.907273]
	}
})

# insert into the product table
product.insert_one({
    '_id': uuid4(),
    'name': 'managed edt allocated pda',
    'description': 'counseling dildo greek pan works interest xhtml wrong dennis available cl specific next tower webcam peace magic',
    'category': 'watercolor',
    'price': 15735.96,
    'currency': '£',
    'discount': "null",
    'quantity': 1,
    'image_url': 'https://source.unsplash.com/1920x1080?',
    "artist": new_artist_id,
    "creation_history": {
        "quantity": 1,
        "created_at": datetime.now(tz=timezone.utc)
	}
})

215 µs ± 8.02 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [20]:
client.close()