In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import ast
from online.envs.bidding_env import BiddingEnv
from definitions import ROOT_DIR


In [None]:
from bidding_train_env.strategy import PlayerBiddingStrategy

In [None]:
strategy = PlayerBiddingStrategy()

In [None]:
action = np.random.randn(6) * 0.1
pvalues = np.random.uniform(low=0, high=0.01, size=3)
pvalues_sigmas = np.random.uniform(low=0, high=0.01, size=3)

print(pvalues)
coefs = strategy.train_env.compute_bid_coef(action, pvalues, pvalues_sigmas)
print(coefs)

In [None]:
strategy.train_env.obs_keys

In [None]:
train_data_path = (
    ROOT_DIR / "data/traffic_top_regression/training_data_16/training_data_all-rlData.csv"
)
training_data = pd.read_csv(train_data_path)

In [None]:
def safe_literal_eval(val):
    if pd.isna(val):
        return val  # 如果是NaN，返回NaN
    try:
        return ast.literal_eval(val)
    except (ValueError, SyntaxError):
        print(ValueError)
        return val 

In [None]:
training_data["state"] = training_data["state"].apply(lambda x: np.array(safe_literal_eval(x)))
training_data["next_state"] = training_data["next_state"].apply(lambda x: np.array(safe_literal_eval(x)))
training_data.head()

In [None]:
training_data.advertiserNumber.unique()

In [None]:
pvalues_path = ROOT_DIR / "data" / "online_rl_data" / "period-7_pvalues.parquet"
bids_path = ROOT_DIR / "data" / "online_rl_data" / "period-7_bids.parquet"

budget_range = (500, 3000)
target_cpa_range = (10, 12)
seed = 42

keys_list = [
    "time_left",
    "budget_left",
    "budget",
    "cpa",
    "category",
    "historical_bid_mean",
    "last_bid_mean",
    "last_three_bid_mean",
    "least_winning_cost_mean",
    "last_least_winning_cost_mean",
    "last_three_least_winning_cost_mean",
    "least_winning_cost_10_pct",
    "last_least_winning_cost_10_pct",
    "last_three_least_winning_cost_10_pct",
    "least_winning_cost_01_pct",
    "last_least_winning_cost_01_pct",
    "last_three_least_winning_cost_01_pct",
    "pvalues_mean",
    "conversion_mean",
    "bid_success_mean",
    "last_pvalues_mean",
    "last_three_pvalues_mean",
    "last_conversion_mean",
    "last_three_conversion_mean",
    "last_bid_success",
    "last_three_bid_success_mean",
    "historical_successful_bid_position_mean",
    "last_successful_bid_position_mean",
    "last_three_successful_bid_position_mean",
    "historical_cost_mean",
    "last_cost_mean",
    "last_three_cost_mean",
    "historical_cost_slot_1_mean",
    "last_cost_slot_1_mean",
    "last_three_cost_slot_1_mean",
    "historical_cost_slot_2_mean",
    "last_cost_slot_2_mean",
    "last_three_cost_slot_2_mean",
    "historical_cost_slot_3_mean",
    "last_cost_slot_3_mean",
    "last_three_cost_slot_3_mean",
    "historical_bid_over_lwc_mean",
    "last_bid_over_lwc_mean",
    "last_three_bid_over_lwc_mean",
    "historical_pv_over_lwc_mean",
    "last_pv_over_lwc_mean",
    "last_three_pv_over_lwc_mean",
    "historical_pv_over_lwc_90_pct",
    "last_pv_over_lwc_90_pct",
    "last_three_pv_over_lwc_90_pct",
    "historical_pv_over_lwc_99_pct",
    "last_pv_over_lwc_99_pct",
    "last_three_pv_over_lwc_99_pct",
    "current_pvalues_mean",
    "current_pvalues_90_pct",
    "current_pvalues_99_pct",
    "current_pv_num",
    "last_pv_num",
    "last_three_pv_num",
    "pv_num_total"
]

env = BiddingEnv(pvalues_path, bids_path, budget_range, target_cpa_range, obs_keys=keys_list, seed=seed)

In [None]:
len(keys_list)

In [None]:
advertiser = 8
delivery_period = 7
budget = training_data[(training_data["advertiserNumber"] == advertiser) & (training_data["deliveryPeriodIndex"] == delivery_period)]["budget"].mean()
target_cpa = training_data[(training_data["advertiserNumber"] == advertiser) & (training_data["deliveryPeriodIndex"] == delivery_period)]["CPAConstraint"].mean()
print(budget, target_cpa)

In [None]:
state, _ = env.reset(budget, target_cpa, advertiser, delivery_period)
ts = 0


In [None]:
action = training_data[(training_data["advertiserNumber"] == advertiser) & (training_data["deliveryPeriodIndex"] == delivery_period) & (training_data["timeStepIndex"] == ts)]["action"].item()
action = action * 1.001  # Otherwise it ties whith itself when it made the third bid
state, reward, truncated, terminated, info = env.step(action / target_cpa)
ts += 1
print(ts)

In [None]:
pvalues, _ = env.get_pvalues_mean_and_std()
state_dict = env.get_state_dict(pvalues)
state_dict

In [None]:
state

In [None]:
state = training_data[(training_data["advertiserNumber"] == advertiser) & (training_data["deliveryPeriodIndex"] == delivery_period) & (training_data["timeStepIndex"] == ts)]["state"].item()
state

In [None]:
df = env.pvalues_df
ts_df = df[(df["advertiserNumber"] == advertiser) & (df["timeStepIndex"] == ts)]
pv = ts_df.pValue.item()
bids = action * pv

In [None]:
raw_data_path = ROOT_DIR / "data" / "raw_traffic" / "period-7.csv"
raw_df = pd.read_csv(raw_data_path)

In [None]:
prev_ts_df = raw_df[(raw_df["advertiserNumber"] == advertiser) & (raw_df["timeStepIndex"] == ts - 1)]
raw_pv = prev_ts_df.pValue.to_numpy()
raw_bids = action * raw_pv

In [None]:
prev_ts_df.xi.mean()

In [None]:
raw_min_cost = prev_ts_df.leastWinningCost
raw_xi = raw_bids > raw_min_cost
strange_df = prev_ts_df[(raw_xi) & (~prev_ts_df.xi.apply(bool))]

In [None]:
raw_bids.shape

In [None]:
prev_ts_df[raw_bids > prev_ts_df.bid]

In [None]:
strange_df.bid > strange_df.leastWinningCost

In [None]:
raw_df[(raw_df["timeStepIndex"] == ts - 1) & (raw_df.pvIndex == 1287)].sort_values("bid", ascending=False)

In [None]:
raw_df.head()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x = np.linspace(0, 0.01, 100)
y = np.sqrt(x)
# plt.plot(x, y)
plt.plot(x, x)
plt.plot(x, 1. * x + 0.0 * y - 10 * x **2)