# Analyse de performance d‚Äôun r√©seau de boutiques retail
(15 boutiques, 12 mois, ventes, trafic, satisfaction)
## l‚Äô√©tape 1 : cr√©er les 4 fichiers CSV de donn√©es

On veut 4 fichiers :

stores.csv ‚Äì info sur les boutiques

sales.csv ‚Äì ventes d√©taill√©es par jour

footfall.csv ‚Äì visiteurs en boutique

feedback.csv ‚Äì satisfaction clients

üóÇ 1. Sch√©ma de chaque fichier
1Ô∏è‚É£ stores.csv

Colonnes :

store_id (1, 2, 3‚Ä¶ 15)

store_name (Store Port-Louis, Store Curepipe‚Ä¶)

city

region (North / South / East / West / Central)

staff_count (nb d‚Äôemploy√©s)

2Ô∏è‚É£ sales.csv

Colonnes :

date (format YYYY-MM-DD, sur 12 mois, par ex. 2024-01-01 ‚Üí 2024-12-31)

store_id

product

category (Mobile, Internet, Accessories, TV‚Ä¶)

quantity

unit_price

On ajoutera ensuite dans l‚ÄôETL : revenue = quantity * unit_price.

3Ô∏è‚É£ footfall.csv

Colonnes :

date

store_id

visitors (nombre de personnes entr√©es dans la boutique ce jour-l√†)

4Ô∏è‚É£ feedback.csv

Colonnes :

date

store_id

satisfaction_score (entre 1 et 5, avec d√©cimales possibles, ex. 4.2)

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# ---------- PARAM√àTRES ----------
np.random.seed(42)
random.seed(42)

start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 12, 31)
n_stores = 15

cities = [
    ("Port-Louis", "North"),
    ("Curepipe", "Central"),
    ("Quatre-Bornes", "Central"),
    ("Ebene", "Central"),
    ("Rose-Hill", "Central"),
    ("Vacoas", "Central"),
    ("Grand-Baie", "North"),
    ("Triolet", "North"),
    ("Mahebourg", "South"),
    ("Flic-en-Flac", "West"),
    ("Tamarin", "West"),
    ("Goodlands", "North"),
    ("Phoenix", "Central"),
    ("Bel-Air", "East"),
    ("Flacq", "East")
]

products = [
    ("Mobile Plan Basic", "Mobile"),
    ("Mobile Plan Premium", "Mobile"),
    ("Home Internet Fiber 20M", "Internet"),
    ("Home Internet Fiber 50M", "Internet"),
    ("TV Package Family", "TV"),
    ("TV Package Sports", "TV"),
    ("Router", "Accessories"),
    ("Headphones", "Accessories"),
    ("Phone Case", "Accessories")
]

# ---------- 1. STORES ----------
store_rows = []
for store_id in range(1, n_stores + 1):
    city, region = cities[store_id - 1]
    store_name = f"Store {city}"
    staff_count = np.random.randint(5, 25)  # entre 5 et 25 employ√©s
    store_rows.append({
        "store_id": store_id,
        "store_name": store_name,
        "city": city,
        "region": region,
        "staff_count": staff_count
    })

stores_df = pd.DataFrame(store_rows)
stores_df.to_csv("stores.csv", index=False)
print("stores.csv cr√©√©")

# ---------- 2. G√âN√âRATION DES DATES ----------
dates = []
current = start_date
while current <= end_date:
    dates.append(current)
    current += timedelta(days=1)

# ---------- 3. SALES ----------
sales_rows = []
for date in dates:
    for store_id in range(1, n_stores + 1):
        # nb de transactions ce jour-l√† dans ce magasin
        n_transactions = np.random.poisson(lam=8)  # moyenne 8 ventes/jour
        for _ in range(n_transactions):
            product_name, category = random.choice(products)
            quantity = np.random.randint(1, 5)
            # prix r√©alistes
            if category == "Mobile":
                unit_price = np.random.randint(800, 2000)
            elif category == "Internet":
                unit_price = np.random.randint(1200, 2500)
            elif category == "TV":
                unit_price = np.random.randint(600, 1500)
            else:  # Accessories
                unit_price = np.random.randint(200, 1200)

            sales_rows.append({
                "date": date.strftime("%Y-%m-%d"),
                "store_id": store_id,
                "product": product_name,
                "category": category,
                "quantity": quantity,
                "unit_price": unit_price
            })

sales_df = pd.DataFrame(sales_rows)
sales_df.to_csv("sales.csv", index=False)
print("sales.csv cr√©√©")

# ---------- 4. FOOTFALL ----------
footfall_rows = []
for date in dates:
    for store_id in range(1, n_stores + 1):
        # base visitors selon r√©gion
        region = stores_df.loc[stores_df["store_id"] == store_id, "region"].values[0]
        if region == "Central":
            base = 120
        elif region == "North":
            base = 100
        elif region == "West":
            base = 90
        else:  # East, South
            base = 80

        visitors = int(np.random.normal(loc=base, scale=30))
        visitors = max(visitors, 10)  # pas moins de 10
        footfall_rows.append({
            "date": date.strftime("%Y-%m-%d"),
            "store_id": store_id,
            "visitors": visitors
        })

footfall_df = pd.DataFrame(footfall_rows)
footfall_df.to_csv("footfall.csv", index=False)
print("footfall.csv cr√©√©")

# ---------- 5. FEEDBACK ----------
feedback_rows = []
for date in dates:
    for store_id in range(1, n_stores + 1):
        # une moyenne satisfaction par jour et par boutique
        # entre 3.0 et 4.8
        base = np.random.normal(loc=4.0, scale=0.5)
        score = round(min(max(base, 1.0), 5.0), 1)
        feedback_rows.append({
            "date": date.strftime("%Y-%m-%d"),
            "store_id": store_id,
            "satisfaction_score": score
        })

feedback_df = pd.DataFrame(feedback_rows)
feedback_df.to_csv("feedback.csv", index=False)
print("feedback.csv cr√©√©")


stores.csv cr√©√©
sales.csv cr√©√©
footfall.csv cr√©√©
feedback.csv cr√©√©
