In [1]:
import pandas as pd
from prophet import Prophet
import plotly.graph_objs as go
import plotly.io as pio
from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import STL
from main_model import MainModel

In [2]:
df = pd.read_csv('sample.csv')
df.head(50000)

Unnamed: 0,txn_date,software_product,active_merchant,new_merchant
0,2025-06-30,Retail,207798,122
1,2025-06-30,Booking,13022,19
2,2025-06-30,FNB,67978,57
3,2025-06-29,Booking,13014,14
4,2025-06-29,Retail,208129,203
...,...,...,...,...
1628,2024-01-02,FNB,45508,93
1629,2024-01-02,Retail,185177,177
1630,2024-01-01,Booking,7883,15
1631,2024-01-01,Retail,184969,76


In [4]:
from main_model import MainModel

df['txn_date'] = pd.to_datetime(df['txn_date'])
newest_date = str(df['txn_date'].max())
oldest_date = str(df['txn_date'].min())
td_df = df.copy()

model = MainModel(
    df = td_df, 
    date_col='txn_date', 
    metric_col='active_merchant')

anomalies_data = model.detect_stl_anomalies(
    type='Retail', 
    start=oldest_date, 
    end=newest_date,
    file_path='static/anomalies_retail.csv',  
    save=True)

today = pd.to_datetime('today').normalize()
forecast_data = model.forecast_with_prophet(
    type='Retail',
    start = oldest_date,
    end = today,
    file_path='static/forecast_retail.csv',
    save=True)

01:04:49 - cmdstanpy - INFO - Chain [1] start processing


Anomaly detection results saved to static/anomalies_retail_20250623.csv


01:04:50 - cmdstanpy - INFO - Chain [1] done processing


Forecast results exported to: static/forecast_retail_20250623.csv
Model performance summary exported to: static/forecast_retail_20250623_summary.csv


In [None]:
software_product = df['software_product'].unique()
print(f"Software products: {software_product}")

In [None]:
# Take current day
today = pd.to_datetime('today').normalize()
print(f"Today's date: {today}")

In [None]:
from statsmodels.tsa.seasonal import STL
import numpy as np
import pandas as pd

def detect_stl_anomalies(data, date_col='txn_date', metric_col='active_merchant',
                         threshold=2.5, period=7, start_date=None, end_date=None):
    df = data.copy()
    df[date_col] = pd.to_datetime(df[date_col])

    # Lọc theo khoảng ngày nếu có chỉ định
    if start_date:
        df = df[df[date_col] >= pd.to_datetime(start_date)]
    if end_date:
        df = df[df[date_col] <= pd.to_datetime(end_date)]

    # Gộp nếu trùng ngày
    if df.duplicated(subset=[date_col]).any():
        df = df.groupby(date_col)[metric_col].sum().reset_index()

    df = df.sort_values(date_col)
    #ts = df.set_index(date_col)[metric_col].fillna(method='ffill').fillna(method='bfill')
    ts = df.set_index(date_col)[metric_col].ffill().bfill()

    if len(ts) < 20:
        # fallback Z-score
        z = (ts - ts.mean()) / ts.std()
        anomalies = np.abs(z) > threshold
        return pd.DataFrame({
            'ds': ts.index,
            'y': ts.values,
            'trend': ts.values,
            'seasonal': 0,
            'residual': ts - ts.mean(),
            'anomaly': anomalies.values,
        }).reset_index(drop=True)

    try:
        stl = STL(ts, period=period, seasonal=21, robust=True)
        result = stl.fit()
        trend, seasonal, residual = result.trend, result.seasonal, result.resid
    except Exception as e:
        print(f"[Warning] STL error: {e}")
        trend = ts.rolling(7, center=True).mean().fillna(ts.mean())
        seasonal = pd.Series(0, index=ts.index)
        residual = (ts - trend).fillna(0)

    res_mean, res_std = residual.mean(), residual.std()
    if res_std < 1e-10:
        res_std = ts.std() * 0.1

    upper = res_mean + threshold * res_std
    lower = res_mean - threshold * res_std
    anomalies = (residual > upper) | (residual < lower)

    return pd.DataFrame({
        'ds': ts.index,
        'y': ts.values,
        'trend': trend.values,
        'seasonal': seasonal.values,
        'residual': residual.values,
        'anomaly': anomalies.values,
        'residual_threshold_upper': upper,
        'residual_threshold_lower': lower
    }).reset_index(drop=True)

In [None]:
from prophet import Prophet

def forecast_with_prophet(data, date_col='ds', metric_col='y',
                          predict_periods=60, train_start=None, train_end=None):
    df = data.copy()
    df[date_col] = pd.to_datetime(df[date_col])
    df = df.rename(columns={date_col: 'ds', metric_col: 'y'})
    #df['y'] = df['y'].fillna(method='ffill').fillna(method='bfill')
    df['y'] = df['y'].ffill().bfill()

    # Lọc theo khoảng ngày nếu có
    if train_start:
        df = df[df['ds'] >= pd.to_datetime(train_start)]
    if train_end:
        df = df[df['ds'] <= pd.to_datetime(train_end)]

    model = Prophet(
        daily_seasonality=True,
        weekly_seasonality=True,
        yearly_seasonality=True
    )
    model.fit(df)

    future = model.make_future_dataframe(periods=predict_periods, freq='D')
    forecast = model.predict(future)

    forecast_df = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].merge(df, on='ds', how='left')
    return forecast_df, model

In [None]:
import plotly.graph_objects as go

def plot_forecast_line(forecast_df, title="Forecast Line Chart", 
                      export_png=True, png_filename=None):
    fig = go.Figure()

    if 'y' in forecast_df.columns:
        fig.add_trace(go.Scatter(
            x=forecast_df['ds'],
            y=forecast_df['y'],
            name='Actual',
            line=dict(color='blue')
        ))

    fig.add_trace(go.Scatter(
        x=forecast_df['ds'],
        y=forecast_df['yhat'],
        name='Forecast',
        line=dict(color='green')
    ))

    fig.add_trace(go.Scatter(
        x=forecast_df['ds'],
        y=forecast_df['yhat_upper'],
        name='Upper Bound',
        line=dict(color='lightgreen', dash='dot'),
        showlegend=False
    ))

    fig.add_trace(go.Scatter(
        x=forecast_df['ds'],
        y=forecast_df['yhat_lower'],
        name='Lower Bound',
        line=dict(color='lightgreen', dash='dot'),
        fill='tonexty',
        fillcolor='rgba(144,238,144,0.2)',
        showlegend=True
    ))

    fig.update_layout(
        title=title,
        xaxis_title='Date',
        yaxis_title='Value',
        template='plotly_white'
    )

    # Export to PNG if requested
    if export_png:
        if png_filename is None:
            png_filename = f"forecast_chart_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.png"
        try:
            fig.write_image(png_filename)
            print(f"Forecast chart exported to: {png_filename}")
        except Exception as e:
            print(f"[Warning] PNG export failed: {e}. Install kaleido with 'pip install kaleido'")

    return fig

In [None]:
from datetime import timedelta
import plotly.graph_objects as go

def plot_recent_anomalies_bar(anomaly_df, days_back=30, title="Recent Anomalies",
                             export_png=True, png_filename=None):
    """
    Plot recent anomalies with proper visualization
    
    Args:
        anomaly_df (pd.DataFrame): DataFrame with anomaly detection results
        days_back (int): Number of days to look back from latest date
        title (str): Chart title
        export_png (bool): Whether to export chart as PNG
        png_filename (str): Custom filename for PNG export
    
    Returns:
        plotly.graph_objects.Figure: Plotly figure object
    """
    latest_date = anomaly_df['ds'].max()
    from_date = latest_date - timedelta(days=days_back)

    # Filter recent data
    recent_df = anomaly_df[
        (anomaly_df['ds'] >= from_date) & 
        (anomaly_df['ds'] <= latest_date)
    ].copy()

    # Separate normal and anomaly points
    normal_points = recent_df[~recent_df['anomaly']]
    anomaly_points = recent_df[recent_df['anomaly']]

    fig = go.Figure()

    # Add normal data points
    if len(normal_points) > 0:
        fig.add_trace(go.Bar(
            x=normal_points['ds'],
            y=normal_points['y'],
            name='Normal Data',
            marker_color='lightblue',
            opacity=0.7
        ))

    # Add anomaly points with different color
    if len(anomaly_points) > 0:
        fig.add_trace(go.Bar(
            x=anomaly_points['ds'],
            y=anomaly_points['y'],
            name='Anomalies',
            marker_color='red',
            opacity=0.7
        ))

    # Update layout
    fig.update_layout(
        title=title,
        xaxis_title='Date',
        yaxis_title='New Merchant Count',
        barmode='group',  # Changed from 'overlay' to 'group'
        template='plotly_white',
        showlegend=True,
        height=500
    )

    # Add annotations for anomaly count
    anomaly_count = len(anomaly_points)
    total_count = len(recent_df)
    anomaly_rate = (anomaly_count / total_count * 100) if total_count > 0 else 0
    
    fig.add_annotation(
        text=f"Anomalies: {anomaly_count}/{total_count} ({anomaly_rate:.1f}%)",
        xref="paper", yref="paper",
        x=0.02, y=0.98,
        showarrow=False,
        font=dict(size=12, color="black"),
        bgcolor="rgba(255,255,255,0.8)",
        bordercolor="black",
        borderwidth=1
    )

    # Export to PNG if requested
    if export_png:
        if png_filename is None:
            png_filename = f"anomaly_chart_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.png"
        try:
            fig.write_image(png_filename)
            print(f"Anomaly chart exported to: {png_filename}")
        except Exception as e:
            print(f"[Warning] PNG export failed: {e}. Install kaleido with 'pip install kaleido'")

    return fig

In [None]:
take_df = df[df['software_product'] == 'Retail']
anomalies_result = detect_stl_anomalies(take_df, date_col='txn_date', metric_col='new_merchant',threshold=2.5, period=7, start_date='2025-03-30', end_date='2025-05-20')
fig_anomalies = plot_recent_anomalies_bar(anomalies_result, days_back=60, title="Anomaly Detection Results")
fig_anomalies.show()

In [None]:
anomalies_result.head(1000000)

In [None]:
take_df = df[df['software_product'] == 'Retail']
anomalies_result = detect_stl_anomalies(take_df, date_col='txn_date', metric_col='active_merchant', threshold=2.5, period=7, start_date='2024-01-01', end_date=today)
forecast_df, model = forecast_with_prophet(anomalies_result, date_col='ds', metric_col='y', predict_periods=60, train_start='2024-01-01', train_end=today)

fig_forecast = plot_forecast_line(forecast_df, title="Forecast for New Merchant Count")
forecast_df.head(1000000)
fig_forecast.show()

In [None]:
forecast_df.head(1000000)