# Création des requêtes initiales au nouveau format NoSQL pour s’assurer que la migration s’est bien passée

In [3]:
pip install pymongo





[notice] A new release of pip is available: 23.3.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
import pandas
import pymongo

URI = "localhost:27017"
client = pymongo.MongoClient(URI)
db = client.sae

## 1. Lister les clients n’ayant jamais effecuté une commande

In [12]:
qst1 = db.Customers.aggregate([
    {"$lookup": {
            "from": "Orders", 
            "localField": "customerNumber", 
            "foreignField": "customerNumber", 
            "as": "orders" 
        }
    },
    {"$match": {
            "orders": { "$size": 0 }
        }
    },
    {"$project": {
            "_id": 0,
            "customerName": 1,
            "customerNumber": 1,
            "country": 1
        }
    }
])

pandas.DataFrame(list(qst1))

Unnamed: 0,customerNumber,customerName,country
0,125,Havel & Zbyszek Co,Poland
1,168,American Souvenirs Inc,USA
2,169,Porto Imports Co.,Portugal
3,206,"Asian Shopping Network, Co",Singapore
4,223,Natürlich Autos,Germany
5,237,ANG Resellers,Spain
6,247,Messner Shopping Network,Germany
7,273,"Franken Gifts, Co",Germany
8,293,BG&E Collectables,Switzerland
9,303,Schuyler Imports,Netherlands


## 2. Pour chaque employé, le nombre de clients, le nombre de commandes et le montant total de celles-ci

In [13]:
qst2 = db.Customers.aggregate([
    {"$lookup": {
        "from": "Orders",
        "localField": "customerNumber",
        "foreignField": "customerNumber",
        "as": "orders"
    }},
    {"$unwind": {"path": "$orders", "preserveNullAndEmptyArrays": True}},
    {"$lookup": {
        "from": "Payments",
        "localField": "customerNumber",
        "foreignField": "customerNumber",
        "as": "payments"
    }},
    {"$unwind": {"path": "$payments", "preserveNullAndEmptyArrays": True}},
    {"$group": {
        "_id": "$salesRepEmployeeNumber",
        "nb_clients": {"$addToSet": "$customerNumber"},
        "nb_commandes": {"$sum": {"$cond": [{"$ifNull": ["$orders", False]}, 1, 0]}},
        "montant_total": {"$sum": "$payments.amount"}
    }},
    {"$project": {
        "_id": 0,
        "employeeNumber": "$_id",
        "nb_clients": {"$size": "$nb_clients"},
        "nb_commandes": 1,
        "montant_total": 1
    }}
])

pandas.DataFrame(list(qst2))

Unnamed: 0,nb_commandes,montant_total,employeeNumber,nb_clients
0,32,943442.48,1166.0,6
1,90,2580550.55,1401.0,10
2,60,2119701.72,1612.0,5
3,63,2352253.84,1611.0,5
4,211,12674066.13,1165.0,6
5,47,1815463.17,1621.0,5
6,58,1870933.55,1323.0,8
7,30,1014439.14,1702.0,6
8,0,0.0,,22
9,382,21850743.39,1370.0,7


## 3. Idem pour chaque bureau (nombre de clients, nombre de commandes et montant total), avec en plus le nombre de clients d’un pays différent, s’il y en a

In [14]:
qst3 = db.Offices.aggregate([
    {
        "$unwind": "$Employees"
    },
    {
        "$lookup": {
            "from": "Customers",
            "localField": "Employees.employeeNumber",
            "foreignField": "salesRepEmployeeNumber",
            "as": "customers"
        }
    },
    {
        "$addFields": {
            "nb_client_paysdiff": {
                "$size": {
                    "$filter": {
                        "input": "$customers",
                        "as": "customer",
                        "cond": {"$ne": ["$$customer.country", "$country"]}
                    }
                }
            }
        }
    },
    {
        "$lookup": {
            "from": "Orders",
            "localField": "customers.customerNumber",
            "foreignField": "customerNumber",
            "as": "orders"
        }
    },
    {
        "$group": {
            "_id": {
                "officeCode": "$officeCode",
                "city": "$city",
                "country": "$country"
            },
            "nombreClients": {"$sum": {"$size": "$customers"}},
            "nombreCommandes": {"$sum": {"$size": "$orders"}},
            "montantTotalCommandes": {
                "$sum": {
                    "$sum": {
                        "$map": {
                            "input": "$orders",
                            "as": "order",
                            "in": {"$sum": "$$order.OrderDetails.priceEach"}
                        }
                    }
                }
            },
            "nb_client_paysdiff": {"$sum": "$nb_client_paysdiff"}
        }
    },
    {
        "$project": {
            "_id": 0,
            "officeCode": "$_id.officeCode",
            "city": "$_id.city",
            "country": "$_id.country",
            "nombreClients": 1,
            "nombreCommandes": 1,
            "montantTotalCommandes": 1,
            "nb_client_paysdiff": 1
        }
    }
])

pandas.DataFrame(list(qst3))

Unnamed: 0,nombreClients,nombreCommandes,montantTotalCommandes,nb_client_paysdiff,officeCode,city,country
0,12,32,28110.93,0,2.0,Boston,USA
1,29,106,95865.19,17,4.0,Paris,France
2,5,16,13904.46,3,5.0,Tokyo,Japan
3,17,47,46645.68,12,7.0,London,UK
4,10,38,36783.97,5,6.0,Sydney,Australia
5,12,48,44346.64,0,1.0,San Francisco,USA
6,15,39,36652.66,3,3.0,NYC,USA


## 4. Pour chaque produit, donner le nombre de commandes, la quantité totale commandée, et le nombre de clients différents ;

In [15]:
qst4 = db.Products.aggregate([
    {"$lookup": {
        "from": "Orders",
        "localField": "productCode",
        "foreignField": "OrderDetails.productCode",
        "as": "orders"
    }},
    {"$unwind": "$orders"},
    {"$unwind": "$orders.OrderDetails"},
    {"$group": {
        "_id": "$productCode",
        "nb_commandes": {"$sum": 1},
        "qte_totale": {"$sum": "$orders.OrderDetails.quantityOrdered"},
        "nb_clients_diff": {"$addToSet": "$orders.customerNumber"}
    }},
    {"$project": {
        "productCode": "$_id",
        "nb_commandes": 1,
        "qte_totale": 1,
        "nb_clients_diff": {"$size": "$nb_clients_diff"}
    }}
])

pandas.DataFrame(list(qst4))


Unnamed: 0,_id,nb_commandes,qte_totale,productCode,nb_clients_diff
0,S12_2823,277,9746,S12_2823,25
1,S18_3685,272,9265,S18_3685,21
2,S18_3320,297,10243,S18_3320,24
3,S18_3029,371,12889,S18_3029,26
4,S18_4409,314,10869,S18_4409,23
...,...,...,...,...,...
104,S700_3505,339,11826,S700_3505,22
105,S18_2325,351,12124,S18_2325,24
106,S18_1889,333,11600,S18_1889,25
107,S24_3371,354,12316,S24_3371,22


## 5. Donner le nombre de commande pour chaque pays, ainsi que le montant total des commandes et le montant total payé : on veut conserver les clients n’ayant jamais commandé dans le résultat final ;

In [16]:
qst5 = db.Customers.aggregate([
    {"$lookup": {
        "from": "Orders",
        "localField": "customerNumber",
        "foreignField": "customerNumber",
        "as": "orders"
    }},
    {"$lookup": {
        "from": "Payments",
        "localField": "customerNumber",
        "foreignField": "customerNumber",
        "as": "payments"
    }},
    {"$group": {
        "_id": "$country",
        "nb_commandes": {"$sum": {"$size": "$orders"}},
        "montant_total": {"$sum": {"$sum": "$orders.OrderDetails.priceEach"}},
        "montant_total_paye": {"$sum": "$payments.amount"}
    }}
])

pandas.DataFrame(list(qst5))

Unnamed: 0,_id,nb_commandes,montant_total,montant_total_paye
0,Italy,10,0,0
1,Sweden,7,0,0
2,Finland,9,0,0
3,Ireland,2,0,0
4,Switzerland,2,0,0
5,Spain,36,0,0
6,Canada,7,0,0
7,Russia,0,0,0
8,France,37,0,0
9,New Zealand,15,0,0


## 6. On veut la table de contigence du nombre de commande entre la ligne de produits et le pays du client ;

In [17]:
qst6 = db.Orders.aggregate([
    {"$lookup": {
        "from": "Customers",
        "localField": "customerNumber",
        "foreignField": "customerNumber",
        "as": "customer"
    }},
    {"$unwind": "$customer"},
    {"$unwind": "$OrderDetails"},

    {"$lookup": {
        "from": "Products",
        "localField": "OrderDetails.productCode",
        "foreignField": "productCode",
        "as": "product"
    }},
    {"$unwind": "$product"},

    {"$group": {
        "_id": {"productLine": "$product.productLine", "country": "$customer.country"},
        "nb_commande": {"$sum": 1}
    }},
    {"$addFields": {
        "ProductLine": "$_id.productLine",
        "Pays": "$_id.country"
    }},
    {"$project": {
        "_id": 0,
        "ProductLine": 1,
        "Pays": 1,
        "nb_commande": 1
    }},
    {"$sort": {"ProductLine": 1, "Pays": 1}}
])
pandas.DataFrame(list(qst6))

Unnamed: 0,nb_commande,ProductLine,Pays
0,53,Classic Cars,Australia
1,25,Classic Cars,Austria
2,4,Classic Cars,Belgium
3,14,Classic Cars,Canada
4,34,Classic Cars,Denmark
...,...,...,...
121,14,Vintage Cars,Singapore
122,74,Vintage Cars,Spain
123,12,Vintage Cars,Sweden
124,39,Vintage Cars,UK


## 7. On veut la même table croisant la ligne de produits et le pays du client, mais avec le montant total payé dans chaque cellule ;

In [20]:
qst7 = db.Orders.aggregate([
    {"$lookup": {
        "from": "Customers",
        "localField": "customerNumber",
        "foreignField": "customerNumber",
        "as": "customer"
    }},
    {"$unwind": "$customer"},
    {"$unwind": "$OrderDetails"},

    {"$lookup": {
        "from": "Products",
        "localField": "OrderDetails.productCode",
        "foreignField": "productCode",
        "as": "product"
    }},
    {"$unwind": "$product"},

    {"$group": {
        "_id": {"productLine": "$product.productLine", "country": "$customer.country"},
        "montant_total_paye": {"$sum": {"$multiply": ["$OrderDetails.priceEach", "$OrderDetails.quantityOrdered"]}}
    }},
    {"$addFields": {
        "ProductLine": "$_id.productLine",
        "Pays": "$_id.country"
    }},
    {"$project": {
        "_id": 0,
        "ProductLine": 1,
        "Pays": 1,
        "montant_total_paye": 1
    }},
    {"$sort": {"ProductLine": 1, "Pays": 1}}
])

pandas.DataFrame(list(qst7))

Unnamed: 0,montant_total_paye,ProductLine,Pays
0,193085.54,Classic Cars,Australia
1,101459.47,Classic Cars,Austria
2,20136.96,Classic Cars,Belgium
3,61623.22,Classic Cars,Canada
4,157182.48,Classic Cars,Denmark
...,...,...,...
121,34960.46,Vintage Cars,Singapore
122,229514.51,Vintage Cars,Spain
123,33804.46,Vintage Cars,Sweden
124,123798.74,Vintage Cars,UK


## 8. Donner les 10 produits pour lesquels la marge moyenne est la plus importante (cf buyPrice et priceEach) ;

In [21]:
qst8 = db.Products.aggregate([
    {"$lookup": {
        "from": "Orders",
        "localField": "productCode",
        "foreignField": "OrderDetails.productCode",
        "as": "orders"
    }},
    {"$unwind": "$orders"},
    {"$unwind": "$orders.OrderDetails"},
    {"$group": {
        "_id": "$productCode",
        "_id": "$productName",
        "marge_moyenne": {"$avg": {"$subtract": ["$orders.OrderDetails.priceEach", "$buyPrice"]}}
    }},
    {"$sort": {"marge_moyenne": -1}},
    {"$limit": 10}
])

pandas.DataFrame(list(qst8))

Unnamed: 0,_id,marge_moyenne
0,1982 Lamborghini Diablo,90.979036
1,1958 Chevy Corvette Limited Edition,89.816465
2,1950's Chicago Surface Lines Streetcar,85.009831
3,1939 Cadillac Limousine,84.781774
4,1926 Ford Fire Engine,81.805282
5,1954 Greyhound Scenicruiser,79.573734
6,1936 Harley Davidson El Knucklehead,78.051333
7,1970 Dodge Coronet,77.015508
8,1962 City of Detroit Streetcar,75.471629
9,1970 Plymouth Hemi Cuda,74.741449


## 9. Lister les produits (avec le nom et le code du client) qui ont été vendus à perte : Si un produit a été dans cette situation plusieurs fois, il doit apparaître plusieurs fois, Une vente à perte arrive quand le prix de vente est inférieur au prix d’achat ;

In [22]:
qst9 = db.Orders.aggregate([
    {"$unwind": "$OrderDetails"},
    {"$lookup": {
        "from": "Products",
        "localField": "OrderDetails.productCode",
        "foreignField": "productCode",
        "as": "product"
    }},
    {"$unwind": "$product"},
    {"$lookup": {
        "from": "Customers",
        "localField": "customerNumber",
        "foreignField": "customerNumber",
        "as": "customer"
    }},
    {"$unwind": "$customer"},
    {"$match": {
        "$expr": {"$lt": ["$OrderDetails.priceEach", "$product.buyPrice"]}
    }},

    {"$project": {
        "_id": 0,  
        "code_produit": "$product.productCode",
        "nom_produit": "$product.productName",
        "nom_client": "$customer.customerName",
        "nb_client": "$customer.customerNumber",
        "priceEach": "$OrderDetails.priceEach",
        "buyPrice": "$product.buyPrice"
    }}
])

pandas.DataFrame(list(qst9))

Unnamed: 0,code_produit,nom_produit,nom_client,nb_client,priceEach,buyPrice
0,S10_4962,1962 LanciaA Delta 16V,Online Diecast Creations Co.,363,61.99,103.42
1,S18_2957,1934 Ford V8 Coupe,Online Diecast Creations Co.,363,29.87,34.35
2,S18_3136,18th Century Vintage Horse Carriage,Online Diecast Creations Co.,363,47.04,60.74
3,S12_3148,1969 Corvair Monza,Vitachrome Inc.,181,54.33,89.14
4,S18_2319,1964 Mercedec Tour Bus,Vitachrome Inc.,181,37.48,74.86
...,...,...,...,...,...,...
74,S10_4962,1962 LanciaA Delta 16V,"Anna's Decorations, Ltd",276,46.90,103.42
75,S12_1666,1958 Setra Bus,"Anna's Decorations, Ltd",276,63.20,77.90
76,S18_2949,1913 Ford Model T Speedster,"Anna's Decorations, Ltd",276,45.25,60.78
77,S18_2238,1998 Chrysler Plymouth Prowler,"Down Under Souveniers, Inc",323,69.81,101.51
