In [21]:
import pandas as pd
import numpy as np
import random

# --- Parameters ---
num_stores = 10
weeks_train = 52
weeks_test = 38
status_map = {0:"Alive", 1:"Risk", 2:"Dead"}

# --- Helper functions ---
def simulate_revenue(base=5000):
    efficiency = random.uniform(0.6, 1.6)   # cửa hàng giỏi hoặc tệ
    festival = random.choice([0, 500])      # lễ hội tăng doanh thu
    promotion = random.choice([0, 300])     # khuyến mãi
    holiday = random.choice([0, 200])       # ngày lễ
    season = random.uniform(-300, 300)      # mùa vụ
    revenue = base*efficiency + festival + promotion + holiday + season
    return max(0, round(revenue))

def simulate_costs():
    rent = round(random.uniform(800,1200))      # tiền thuê
    maintenance = round(random.uniform(50,150)) # bảo trì
    input_cost = round(random.uniform(1800,2400)) # nguyên liệu
    salary = round(random.uniform(1200,1800))   # lương
    return rent, maintenance, input_cost, salary

def determine_status(profit, consecutive_neg, store_capital):
    if store_capital <= 0 or consecutive_neg >= 4:
        return 2  # Dead
    elif profit < 0 and consecutive_neg >= 2:
        return 1  # Risk
    else:
        return 0  # Alive

# --- Training dataset: 10 stores ---
data_train = []
for store in range(num_stores):
    store_capital = random.randint(8000,15000)  # vốn khởi điểm khác nhau
    consecutive_neg = 0
    for week in range(1, weeks_train+1):
        revenue = simulate_revenue()
        rent, maintenance, input_cost, salary = simulate_costs()
        total_cost = rent + maintenance + input_cost + salary
        profit = revenue - total_cost
        store_capital += profit

        consecutive_neg = consecutive_neg+1 if profit < 0 else 0
        status = determine_status(profit, consecutive_neg, store_capital)

        data_train.append({
            "Store": store+1,
            "Week": week,
            "Store Capital": store_capital,
            "Revenue": revenue,
            "Rent": rent,
            "Maintenance": maintenance,
            "Input Cost": input_cost,
            "Salary": salary,
            "Festival": 1 if revenue > 6000 else 0,
            "Season": round(np.sin(week/52*2*np.pi)*100),
            "Holiday": random.choice([0,1]),
            "Promotion": random.choice([0,1]),
            "Profit": profit,
            "Store Status": status
        })

df_train = pd.DataFrame(data_train)

# --- Test dataset: 1 simulation store ---
data_test = []
store_capital = random.randint(8000,15000)
consecutive_neg = 0
for week in range(1, weeks_test+1):
    revenue = simulate_revenue()
    rent, maintenance, input_cost, salary = simulate_costs()
    total_cost = rent + maintenance + input_cost + salary
    profit = revenue - total_cost
    store_capital += profit

    consecutive_neg = consecutive_neg+1 if profit < 0 else 0

    data_test.append({
        "Week": week,
        "Store Capital": store_capital,
        "Revenue": revenue,
        "Rent": rent,
        "Maintenance": maintenance,
        "Input Cost": input_cost,
        "Salary": salary,
        "Festival": 1 if revenue > 6000 else 0,
        "Season": round(np.sin(week/weeks_test*2*np.pi)*100),
        "Holiday": random.choice([0,1]),
        "Promotion": random.choice([0,1]),
        "Profit": profit
    })

df_test = pd.DataFrame(data_test)

# --- View samples ---
print("Train dataset (10 stores, first 10 rows):")
print(df_train.head(10))
print("\nTest dataset (1 store, first 10 rows):")
print(df_test.head(10))

# Check label distribution
print("\nLabel distribution in training set:")
print(df_train["Store Status"].value_counts())

df_new = df_train.drop(["Store", "Store Status"],axis=1)
df_labels = df_train["Store Status"]

import xgboost as xgb
model = xgb.XGBClassifier(objective='multi:softprob',
                         num_class=3,
                         n_estimators=250,
                         max_depth=7,
                         learning_rate=0.1,
                         subsample=0.8,
                         colsample_bytree=1,
                         reg_alpha=0.1)
model.fit(df_new,df_labels)
test_proba = model.predict_proba(df_test)  
test_pred = model.predict(df_test)

test_proba = np.round(test_proba,3)

print("Predicted class:", test_pred)
print("Prediction probabilities:", test_proba)



Train dataset (10 stores, first 10 rows):
   Store  Week  Store Capital  Revenue  Rent  Maintenance  Input Cost  Salary  \
0      1     1          12699     3484   857          103        2111    1360   
1      1     2          13225     5503  1142           68        2134    1633   
2      1     3          17119     8088   975          135        1808    1276   
3      1     4          19672     7413  1161          112        1960    1627   
4      1     5          18167     3102   845          109        2090    1563   
5      1     6          17008     3538   988           95        2017    1597   
6      1     7          15991     3348   850          118        2177    1220   
7      1     8          17992     6678   991           89        2092    1505   
8      1     9          19748     6596   930           52        2191    1667   
9      1    10          22676     7921  1106          120        2187    1580   

   Festival  Season  Holiday  Promotion  Profit  Store Status  
0 