In [None]:
import numpy as np
import pandas as pd
from annual_events_date.christmas_event import christmas_event
from annual_events_date.easter_event import easter_week
from annual_events_date.new_year import new_year
from annual_events_date.valentine_event import valentine_event
from annual_events_date.is_rainy import is_rainy_season
from annual_events_date.halloween_event import halloween
from annual_events_date.independence_day_event import independence_day
from annual_events_date.is_black_friday import is_black_friday_promo
from sklearn.metrics import mean_absolute_error




In [2]:
np.random.seed(42) #karena semua individu data engineering menyayangi angka 42

event_func = [
    christmas_event,
    easter_week,
    new_year,
    valentine_event,
    halloween,
    independence_day,
    is_black_friday_promo
]

rows = 100

start = pd.Timestamp("2023-01-12")
end   = pd.Timestamp("2100-05-25")

random_dates = pd.to_datetime(
    np.random.uniform(
        start.value,
        end.value,
        size=rows
    )
)

df = pd.DataFrame(
    {
        "datetime" : random_dates,
        "event" : False,
        "promo" : 0,
        "kedai_ramai" : False,
        "is_rain" : False,
        # "pegawai_lengkap" : True,
        "revenue" : 1_000_000
    }
)

#apply func to event
event_mask = np.zeros(rows, dtype=bool)
for events in event_func:
    event_mask |= df["datetime"].apply(events)
df['event'] = event_mask

#apply func to promo encode
df.loc[df['event'], 'promo'] = np.random.randint(2,4, size=df['event'].sum())
promo_mask_event = df['promo'] >= 2 
promo_mask_normal = df['promo'] < 2 

#apply func to kedai_ramai
df.loc[promo_mask_event, 'kedai_ramai'] = np.random.rand(promo_mask_event.sum()) < 0.8
df.loc[promo_mask_normal, 'kedai_ramai'] = np.random.rand(promo_mask_normal.sum()) < 0.55 

#apply func to is_rain
rainy_season_mask = df["datetime"].apply(is_rainy_season)
df['is_rain'] = rainy_season_mask.apply(
    lambda x : np.random.rand() < (0.9 if x else 0.1)
)

#pegawai_lengkap rand
#will add in the future

#revenue affect func
base_revenue = 10.0

df["rev_score"] = 1.0
# promo
df.loc[df["promo"] == 2, "rev_score"] += 0.15
df.loc[df["promo"] == 3, "rev_score"] += 0.25

# kedai ramai
df.loc[df["kedai_ramai"], "rev_score"] += 0.30

# hujan
df.loc[df["is_rain"], "rev_score"] -= 0.10
# event + ramai = peak day
df.loc[df["event"] & df["kedai_ramai"], "rev_score"] += 0.20

# hujan + sepi = sekarat
df.loc[df["is_rain"] & ~df["kedai_ramai"], "rev_score"] -= 0.20
df["rev_score"] = df["rev_score"].clip(0.5, 2.5)
df["revenue"] = base_revenue * df["rev_score"]
df["revenue"] *= np.random.normal(1.0, 0.05, size=len(df))

df.iloc[80:100, :]

Unnamed: 0,datetime,event,promo,kedai_ramai,is_rain,revenue,rev_score
80,2089-10-20 17:07:26.829118464,False,0,True,True,12.062141,1.2
81,2071-04-02 12:50:38.667808768,True,3,True,False,17.734182,1.75
82,2048-08-18 04:27:06.185777152,False,0,True,False,12.127497,1.3
83,2027-12-12 23:14:21.468379392,False,0,False,False,8.317952,1.0
84,2047-02-02 10:15:32.951761920,False,0,False,True,7.39324,0.7
85,2048-03-09 16:55:23.276045824,False,0,False,True,7.750122,0.7
86,2079-06-23 11:33:45.904205824,False,0,False,False,9.828706,1.0
87,2072-05-09 11:04:30.842479616,False,0,True,True,12.149589,1.2
88,2091-09-01 23:17:28.347008000,False,0,True,False,13.210937,1.3
89,2059-07-25 09:03:04.922023936,False,0,False,False,10.837647,1.0


In [3]:
import joblib

pipeline = joblib.load("revenue_model_2_fix.pkl")
predict = pipeline.predict(df.drop(columns=["revenue", "rev_score", "datetime"]))
mae = mean_absolute_error(df["revenue"], predict)

In [6]:
print(predict)
print(mae)

[12.00386142 13.02908421 13.02908421 13.02908421 12.00386142  6.97010951
 13.02908421 12.00386142 13.02908421  6.97010951 12.00386142  6.97010951
 13.02908421 13.02908421 12.00386142 12.00386142 13.02908421 13.02908421
 10.0626253  13.02908421 13.02908421 12.00386142 13.02908421 10.0626253
 13.02908421 12.00386142 13.02908421 12.00386142  6.97010951 10.0626253
 12.00386142  6.97010951 12.00386142 10.0626253  12.00386142 13.02908421
 13.02908421 10.0626253  15.65069544  6.97010951 13.02908421 13.02908421
 10.0626253  10.0626253  13.02908421 12.00386142 12.00386142 17.33654175
 10.0626253  13.02908421  6.97010951 15.65069544 13.02908421 16.0486053
  6.97010951 10.0626253  12.00386142 12.00386142 10.0626253   6.97010951
 12.00386142 12.00386142  6.97010951 10.0626253  12.00386142 12.00386142
  6.97010951 12.00386142 12.00386142  6.97010951  6.97010951 10.0626253
 13.02908421  9.69221036 13.02908421  6.97010951 13.02908421 12.00386142
  6.97010951 15.65069544 12.00386142 17.33654175 13.029

In [7]:
tot_rev = df["revenue"].sum()
print("total revenue :", tot_rev)

total revenue : 1120.0277246193396
