#Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import classification_report,f1_score, confusion_matrix, accuracy_score

In [None]:
df = pd.read_csv('/content/drive/MyDrive/swuds/t5_thesis/98_progress/dataset/nd_sales_LogisticRegression_base.csv')
df.head()

#Preprocess

In [None]:
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')

In [None]:
df['lag_1'] = df['sales_sum'].shift(1)
df['lag_2'] = df['sales_sum'].shift(2)
df['lag_3'] = df['sales_sum'].shift(3)
df['lag_4'] = df['sales_sum'].shift(4)
df['lag_5'] = df['sales_sum'].shift(5)
df['lag_6'] = df['sales_sum'].shift(6)
df['lag_7'] = df['sales_sum'].shift(7)
df['lag_8'] = df['sales_sum'].shift(8)
df['lag_9'] = df['sales_sum'].shift(9)
df['lag_10'] = df['sales_sum'].shift(10)
df['lag_11'] = df['sales_sum'].shift(11)

In [None]:
df.dropna(inplace=True)
df.head()

In [None]:
df['is_Sun'] = (df.index.dayofweek == 6).astype(int)
df['is_Mon'] = (df.index.dayofweek == 0).astype(int)
df['is_Tue'] = (df.index.dayofweek == 1).astype(int)
df['is_Wed'] = (df.index.dayofweek == 2).astype(int)
df['is_Thu'] = (df.index.dayofweek == 3).astype(int)
df['is_Fri'] = (df.index.dayofweek == 4).astype(int)
df.head(3)

In [None]:
df['is_Jan'] = (df.index.month == 1).astype(int)
df['is_Feb'] = (df.index.month == 2).astype(int)
df['is_Mar'] = (df.index.month == 3).astype(int)
df['is_Apr'] = (df.index.month == 4).astype(int)
df['is_May'] = (df.index.month == 5).astype(int)
df['is_Jun'] = (df.index.month == 6).astype(int)
df['is_Jul'] = (df.index.month == 7).astype(int)
df['is_Aug'] = (df.index.month == 8).astype(int)
df['is_Sep'] = (df.index.month == 9).astype(int)
df['is_Oct'] = (df.index.month == 10).astype(int)
df['is_Nov'] = (df.index.month == 11).astype(int)
df['is_Dec'] = (df.index.month == 12).astype(int)
df.head(3)

In [None]:
df_train_window = df.tail(187)
df_train_window

In [None]:
threshold = np.percentile(df_train_window['sales_sum'], 66)
threshold

In [None]:
df_train_window['is_hotseller'] = np.where(df_train_window['sales_sum'] >= threshold, 1, 0)
df_train_window.head()

In [None]:
# นับจำนวน is_hot_seller ที่เป็น 1 และ 0
is_hotseller_counts = df_train_window['is_hotseller'].value_counts()

print("จำนวน is_hotseller ที่เป็น 1:", is_hotseller_counts.get(1, 0))
print("จำนวน is_hotseller ที่เป็น 0:", is_hotseller_counts.get(0, 0))

In [None]:
df_train_window.tail(8)

In [None]:
df_train_window = df_train_window.reindex(columns=['is_hotseller', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'lag_8', 'lag_9', 'lag_10', 'lag_11'
])

In [None]:
df_for_split = df_train_window.copy()
train_data = df_for_split.head(120)
test_data = df_for_split.tail(7)
print(f"Training data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")

In [None]:
features_recursive = [ 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'lag_8', 'lag_9', 'lag_10', 'lag_11'
    ]

target = 'is_hotseller'

X_train = train_data[features_recursive]
y_train = train_data[target]

#Train

มีความจำเป็นต้องกำหนด class_weight='balanced' เนื่องจากปัญหา Imbalance target ซึ่งเกิดจากการต้องการแยกวันสำคัญ ซึ่งคือวันขายดีกว่าปกติ ซึ่งเป็นส่วนน้อย

In [None]:
# เพิ่ม class_weight='balanced' เข้าไป
model_recursive = LogisticRegression(class_weight='balanced')
model_recursive.fit(X_train, y_train)

In [None]:
#ทำนายแบบ Recursive 7 ขั้นตอน ---
last_known_lags = train_data[['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'lag_8', 'lag_9', 'lag_10', 'lag_11']].tail(1).values.flatten()

future_dates = pd.date_range(start=test_data.index[0], periods=7, freq='D')
future_df = pd.DataFrame(index=future_dates)

In [None]:
print(future_df.shape)
future_df

In [None]:
predictions_recursive = []
current_lags = last_known_lags.copy()

print("Starting 7-step recursive forecast...")
for i in range(7):
    input_vector = np.concatenate([current_lags]).reshape(1, -1)

    prediction = model_recursive.predict(input_vector)[0]
    predictions_recursive.append(prediction)

    current_lags = np.roll(current_lags, -1)
    current_lags[-1] = prediction
    print(f"Step {i+1}/7 predicted: {prediction:.2f}")

    print("\nRecursive forecasting completed.")

#Evaluate

In [None]:
# prompt: วัดผลด้วย Confusion Matrix

# Extract the true target values for the test set
y_true = test_data[target]

# Create the confusion matrix
cm = confusion_matrix(y_true, predictions_recursive)

# แสดง Confusion Matrix
print("\nConfusion Matrix:")
print(cm)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_true, predictions_recursive))

# Print accuracy
accuracy = accuracy_score(y_true, predictions_recursive)
print(f"\nAccuracy: {accuracy:.4f}")

# Print F1-score
f1 = f1_score(y_true, predictions_recursive)
print(f"F1-Score: {f1:.4f}")

In [None]:
# คำนวณ Confusion Matrix
cm = confusion_matrix(y_test, predictions)

# แสดง Confusion Matrix
print("\nConfusion Matrix:")
print(cm)

In [None]:
print("\nClassification Report:")
print(classification_report(y_test, predictions))