In [119]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


In [120]:
# Générer des données pour les utilisateurs
user_ids = list(range(1, 101))
names = ["Alice", "Bob", "Charlie", "Diana", "Eve", "Frank", "Grace", "Heidi"]
ages = np.random.randint(18, 70, size=100)
countries = ["USA", "Canada", "France", "Germany", "Australia"]
user_data = {
    "user_id": user_ids,
    "name": [random.choice(names) for _ in range(100)],
    "age": ages,
    "country": [random.choice(countries) for _ in range(100)]
}
df_users = pd.DataFrame(user_data)

# Générer des données pour les produits
product_ids = list(range(1, 21))
categories = ["Electronics", "Books", "Clothing", "Food", "Toys"]
prices = np.random.uniform(5.0, 100.0, size=20).round(2)
product_data = {
    "product_id": product_ids,
    "product_name": [f"Product_{i}" for i in range(1, 21)],
    "category": [random.choice(categories) for _ in range(20)],
    "price": prices
}
df_products = pd.DataFrame(product_data)

# Générer des données pour les achats
purchase_data = {
    "user_id": [random.choice(user_ids) for _ in range(200)],
    "product_id": [random.choice(product_ids) for _ in range(200)],
    "date": [datetime.now() - timedelta(days=random.randint(1, 365)) for _ in range(200)],
    "quantity": np.random.randint(1, 5, size=200)
}
df_purchases = pd.DataFrame(purchase_data)


In [121]:
df_users.rename(columns={"user_id": "user_id", "name": "user_name", "age": "user_age", "country": "user_country"}, inplace=True)
df_products.rename(columns={"product_id": "product_id", "product_name": "product_name", "category": "product_category", "price": "product_price"}, inplace=True)
df_purchases.rename(columns={"user_id": "user_id", "product_id": "product_id", "date": "purchase_date", "quantity": "purchase_quantity"}, inplace=True)

In [122]:
df_users

Unnamed: 0,user_id,user_name,user_age,user_country
0,1,Bob,48,Canada
1,2,Charlie,46,France
2,3,Alice,32,Germany
3,4,Alice,18,Canada
4,5,Charlie,65,Australia
5,6,Alice,30,Canada
6,7,Bob,62,Canada
7,8,Alice,60,France
8,9,Charlie,63,Germany
9,10,Alice,19,USA


In [123]:
df_products.head(20)

Unnamed: 0,product_id,product_name,product_category,product_price
0,1,Product_1,Clothing,14.97
1,2,Product_2,Food,41.1
2,3,Product_3,Toys,53.79
3,4,Product_4,Toys,74.44
4,5,Product_5,Books,22.93
5,6,Product_6,Books,6.01
6,7,Product_7,Electronics,83.73
7,8,Product_8,Books,24.74
8,9,Product_9,Food,75.19
9,10,Product_10,Toys,76.24


In [124]:
df_purchases.head()

Unnamed: 0,user_id,product_id,purchase_date,purchase_quantity
0,71,10,2023-11-09 18:27:47.829163,4
1,79,17,2023-11-11 18:27:47.829163,2
2,51,18,2024-06-04 18:27:47.829163,3
3,15,5,2024-04-29 18:27:47.829163,1
4,49,20,2024-03-02 18:27:47.829163,4


In [125]:
real_product_names = {
    "Product_1": "Smartphone",
    "Product_2": "Laptop",
    "Product_3": "Headphones",
    "Product_4": "T-shirt",
    "Product_5": "Novel Book",
    "Product_6": "Action Figure",
    "Product_7": "Sneakers",
    "Product_8": "Blender",
    "Product_9": "Tablet",
    "Product_10": "Coffee Machine",
    "Product_11": "Guitar",
    "Product_12": "Jacket",
    "Product_13": "Backpack",
    "Product_14": "Fitness Tracker",
    "Product_15": "Board Game",
    "Product_16": "Cookbook",
    "Product_17": "Smartwatch",
    "Product_18": "Desk Lamp",
    "Product_19": "Dollhouse",
    "Product_20": "Wireless Charger"
}

df_products['product_name']=df_products['product_name'].map(real_product_names)
df_products.head()

Unnamed: 0,product_id,product_name,product_category,product_price
0,1,Smartphone,Clothing,14.97
1,2,Laptop,Food,41.1
2,3,Headphones,Toys,53.79
3,4,T-shirt,Toys,74.44
4,5,Novel Book,Books,22.93


In [126]:
df_purchases_total = pd.merge(df_purchases, df_products, on="product_id", how='left')

In [127]:
df_purchases_total.head()

Unnamed: 0,user_id,product_id,purchase_date,purchase_quantity,product_name,product_category,product_price
0,71,10,2023-11-09 18:27:47.829163,4,Coffee Machine,Toys,76.24
1,79,17,2023-11-11 18:27:47.829163,2,Smartwatch,Books,97.2
2,51,18,2024-06-04 18:27:47.829163,3,Desk Lamp,Food,91.2
3,15,5,2024-04-29 18:27:47.829163,1,Novel Book,Books,22.93
4,49,20,2024-03-02 18:27:47.829163,4,Wireless Charger,Clothing,48.89


In [128]:
df_purchases_total.shape, df_purchases.shape

((200, 7), (200, 4))

In [129]:
df_purchases_total['cost'] = df_purchases_total['purchase_quantity'] * df_purchases_total['product_price']

In [130]:
user_spending = df_purchases_total.groupby('user_id').agg(total_quantity=('purchase_quantity', 'sum'), total_spent=('cost', 'sum'), avarage_quantity=('purchase_quantity', 'sum'), product_name_median = ('product_name', lambda x: x.mode()[0]), product_category_median = ('product_category', lambda x: x.mode()[0])).reset_index()

In [131]:
user_spending

Unnamed: 0,user_id,total_quantity,total_spent,avarage_quantity,product_name_median,product_category_median
0,1,4,199.26,4,Board Game,Toys
1,2,22,1010.6,22,Blender,Books
2,3,11,497.22,11,Blender,Books
3,4,9,469.17,9,Tablet,Food
4,5,2,182.4,2,Desk Lamp,Food
5,6,7,253.19,7,Fitness Tracker,Books
6,8,5,65.14,5,Action Figure,Books
7,9,8,514.24,8,Coffee Machine,Food
8,11,10,276.2,10,Dollhouse,Electronics
9,14,2,92.2,2,Fitness Tracker,Food


In [132]:
categrories_spending = df_purchases_total.groupby('product_category').agg(total_quantity=('purchase_quantity', 'sum'), total_spent=('cost', 'sum'), avarage_quantity=('purchase_quantity', 'sum'), product_name_median = ('product_name', lambda x: x.mode()[0]), product_category_median = ('product_category', lambda x: x.mode()[0])).reset_index()

In [133]:
categrories_spending

Unnamed: 0,product_category,total_quantity,total_spent,avarage_quantity,product_name_median,product_category_median
0,Books,124,6194.21,124,Smartwatch,Books
1,Clothing,56,2262.96,56,Wireless Charger,Clothing
2,Electronics,113,3626.31,113,Dollhouse,Electronics
3,Food,153,10547.3,153,Desk Lamp,Food
4,Toys,69,4105.76,69,Headphones,Toys


In [134]:
categrories_spending.sort_values('total_spent', ascending=False)

Unnamed: 0,product_category,total_quantity,total_spent,avarage_quantity,product_name_median,product_category_median
3,Food,153,10547.3,153,Desk Lamp,Food
0,Books,124,6194.21,124,Smartwatch,Books
4,Toys,69,4105.76,69,Headphones,Toys
2,Electronics,113,3626.31,113,Dollhouse,Electronics
1,Clothing,56,2262.96,56,Wireless Charger,Clothing


In [135]:
df_purchases_total=df_purchases_total.merge(df_users, on='user_id', how='left')
df_purchases_total.head()

Unnamed: 0,user_id,product_id,purchase_date,purchase_quantity,product_name,product_category,product_price,cost,user_name,user_age,user_country
0,71,10,2023-11-09 18:27:47.829163,4,Coffee Machine,Toys,76.24,304.96,Alice,68,France
1,79,17,2023-11-11 18:27:47.829163,2,Smartwatch,Books,97.2,194.4,Alice,61,Australia
2,51,18,2024-06-04 18:27:47.829163,3,Desk Lamp,Food,91.2,273.6,Heidi,57,Germany
3,15,5,2024-04-29 18:27:47.829163,1,Novel Book,Books,22.93,22.93,Charlie,40,Canada
4,49,20,2024-03-02 18:27:47.829163,4,Wireless Charger,Clothing,48.89,195.56,Grace,52,Canada


In [136]:
df_purchases_total.groupby('user_country').agg(total_quantity=('purchase_quantity', 'sum'), total_spent=('cost', 'sum'), avarage_quantity=('purchase_quantity', 'sum'), product_name_median = ('product_name', lambda x: x.mode()[0]), product_category_median = ('product_category', lambda x: x.mode()[0])).reset_index()

Unnamed: 0,user_country,total_quantity,total_spent,avarage_quantity,product_name_median,product_category_median
0,Australia,83,4045.19,83,Blender,Food
1,Canada,146,7666.18,146,Tablet,Food
2,France,108,5219.57,108,Cookbook,Books
3,Germany,97,5692.84,97,Wireless Charger,Food
4,USA,81,4112.76,81,Fitness Tracker,Books


In [137]:
df_products["discounted_price"] = df_products.apply(
    lambda x: x["product_price"] * 0.9 if x["product_category"] in ["Books", "Toys"] else x["product_price"], axis=1
)