# 📈 Dynamic Pricing Analysis

Analisis ini menggunakan dataset terintegrasi yang dibangun pada notebook **Copy_of_dataset_making_dp**. 
Dataset tersebut menggabungkan informasi harga dasar, kompetisi, volume permintaan, data wisatawan, event bulanan, serta indikator cuaca untuk setiap kategori produk wisata.

## 1. 🛠️ Environment Setup dan Import Library

Menyiapkan library yang dibutuhkan untuk eksplorasi data, rekayasa fitur, serta pemodelan permintaan.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
np.random.seed(42)

## 2. 📥 Muat Dataset Integrasi

Dataset disimpan pada file `data/dynamic_pricing_dataset.csv` dan memiliki kolom:
`date`, `base_price_idr`, `category`, `competitive_price`, `competitor_count`, `category_quantity`, `total_visitors`, `monthly_event_days`, `temperature_celsius`, `prcp_mm`.

In [None]:
data_path = Path('data/dynamic_pricing_dataset.csv')
if not data_path.exists():
    raise FileNotFoundError('Dataset dynamic_pricing_dataset.csv tidak ditemukan. Pastikan file dihasilkan dari proses integrasi data.')

df = pd.read_csv(data_path, parse_dates=['date'])
df['category'] = df['category'].astype('category')
df = df.sort_values('date').reset_index(drop=True)

display(df.head())
display(df.describe(include='all').transpose())
print(df.info())

## 3. 📊 Eksplorasi Permintaan & Harga

Melihat dinamika volume penjualan (`category_quantity`) dan perbandingan harga dasar dengan harga kompetitor.

In [None]:
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['price_gap'] = df['base_price_idr'] - df['competitive_price']

monthly_qty = (
    df.groupby(['date', 'category'])
      ['category_quantity']
      .sum()
      .reset_index()
)

plt.figure(figsize=(14, 6))
sns.lineplot(data=monthly_qty, x='date', y='category_quantity', hue='category', marker='o')
plt.title('Tren Permintaan Bulanan per Kategori')
plt.xlabel('Tanggal')
plt.ylabel('Unit Terjual')
plt.legend(title='Kategori')
plt.show()

plt.figure(figsize=(12, 5))
sns.boxplot(data=df, x='category', y='price_gap', palette='viridis')
plt.title('Sebaran Gap Harga vs Kompetitor')
plt.xlabel('Kategori Produk')
plt.ylabel('Base Price - Competitor Price (IDR)')
plt.show()

## 4. 🔧 Rekayasa Fitur

Membangun fitur tambahan untuk menangkap faktor musiman, tekanan kompetisi, serta dampak cuaca/event.

In [None]:
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
df['price_ratio'] = df['base_price_idr'] / df['competitive_price']
df['event_intensity'] = df['monthly_event_days'] / df['monthly_event_days'].max()
df['visitor_index'] = df['total_visitors'] / df['total_visitors'].rolling(window=12, min_periods=1).mean()
df['weather_index'] = (df['temperature_celsius'] / df['temperature_celsius'].rolling(window=12, min_periods=1).mean()) * (df['prcp_mm'] / df['prcp_mm'].rolling(window=12, min_periods=1).mean())

feature_cols = [
    'base_price_idr', 'competitive_price', 'price_gap', 'price_ratio',
    'competitor_count', 'total_visitors', 'monthly_event_days',
    'temperature_celsius', 'prcp_mm', 'month_sin', 'month_cos',
    'event_intensity', 'visitor_index', 'weather_index'
]
target_col = 'category_quantity'

model_df = df[['date', 'category', target_col] + feature_cols].dropna().reset_index(drop=True)
model_df.head()

## 5. 🤖 Pemodelan Permintaan

Menggunakan **RandomForestRegressor** untuk memprediksi `category_quantity` berdasarkan fitur yang tersedia.

In [None]:
X = model_df[['category'] + feature_cols]
y = model_df[target_col]

categorical_features = ['category']
numeric_features = [col for col in feature_cols]

preprocess = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), categorical_features)
    ],
    remainder='passthrough'
)

model = Pipeline(steps=[
    ('prep', preprocess),
    ('regressor', RandomForestRegressor(n_estimators=300, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f'MAE : {mae:,.2f}')
print(f'RMSE: {rmse:,.2f}')
print(f'R^2 : {r2:.3f}')

feature_names = model.named_steps['prep'].get_feature_names_out(categorical_features + numeric_features)
importances = model.named_steps['regressor'].feature_importances_
feat_imp = (
    pd.DataFrame({'feature': feature_names, 'importance': importances})
      .sort_values('importance', ascending=False)
)

plt.figure(figsize=(10, 6))
sns.barplot(data=feat_imp.head(15), x='importance', y='feature', palette='crest')
plt.title('Top 15 Feature Importance - Random Forest')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()

## 6. 💡 Simulasi Penyesuaian Harga

Mengukur dampak potensial terhadap permintaan dan pendapatan ketika harga dinaikkan/dikurangi sebesar ±5% dan ±10%.

In [None]:
latest_period = model_df['date'].max()
latest_data = model_df[model_df['date'] == latest_period].copy()
scenarios = []
for adj in [-0.10, -0.05, 0.0, 0.05, 0.10]:
    scenario = latest_data.copy()
    scenario['price_adjustment'] = adj
    scenario['base_price_idr'] = scenario['base_price_idr'] * (1 + adj)
    scenario['price_gap'] = scenario['base_price_idr'] - scenario['competitive_price']
    scenario['price_ratio'] = scenario['base_price_idr'] / scenario['competitive_price']
    X_scenario = scenario[['category'] + feature_cols]
    scenario['predicted_quantity'] = model.predict(X_scenario)
    scenario['projected_revenue'] = scenario['predicted_quantity'] * scenario['base_price_idr']
    scenario['adjustment_pct'] = adj * 100
    scenarios.append(scenario[['category', 'adjustment_pct', 'base_price_idr', 'predicted_quantity', 'projected_revenue']])

scenario_df = pd.concat(scenarios, ignore_index=True)
pivot_rev = scenario_df.pivot_table(
    index='category', columns='adjustment_pct', values='projected_revenue'
)

display(scenario_df.head(12))
display(pivot_rev)

plt.figure(figsize=(10, 6))
for category, group in scenario_df.groupby('category'):
    plt.plot(group['adjustment_pct'], group['projected_revenue'] / 1e9, marker='o', label=category)
plt.title('Simulasi Pendapatan vs Penyesuaian Harga (Latest Period)')
plt.xlabel('Penyesuaian Harga (%)')
plt.ylabel('Pendapatan (Miliar IDR)')
plt.legend()
plt.axvline(0, color='black', linestyle='--', alpha=0.5)
plt.show()

## 7. 📋 Ringkasan Insight

Merangkum temuan kunci dari analisis dan simulasi untuk rekomendasi strategis.

In [None]:
start_period = df['date'].min().date()
end_period = df['date'].max().date()
insight = {
    'periode_data': '{} s.d. {}'.format(start_period, end_period),
    'jumlah_observasi': int(len(df)),
    'kategori': df['category'].cat.categories.tolist(),
    'metrik_model': {
        'MAE': round(mae, 2),
        'RMSE': round(rmse, 2),
        'R2': round(r2, 3)
    }
}

print('Ringkasan Insight:')
for key, value in insight.items():
    print(f'- {key}: {value}')

print('
Rekomendasi:')
print('* Fokuskan promosi pada bulan dengan event tinggi (Juni-Agustus, Desember) karena berdampak positif pada permintaan.')
print('* Monitor gap harga terhadap kompetitor; kategori dengan gap positif besar menunjukkan peluang optimasi pricing.')
print('* Gunakan simulasi penyesuaian harga untuk menetapkan target revenue realistis per kategori.')
print('* Jadwalkan pembaruan model tiap kuartal untuk menangkap perubahan tren wisatawan dan cuaca.')