In [14]:
import pandas as pd
import joblib
import os
from datetime import datetime, timedelta

In [15]:
MODEL_PATH = 'atm_weekly_usd_cash_forecast_model.pkl'


In [16]:
def preprocess_input(df):
    """Preprocess data for weekly prediction"""
    try:
        #select only usd transactions
        df = df[df['TXN_CCY_CODE'] == "USD"].drop('TXN_CCY_CODE', axis=1)

        # Convert TERM_ID using the saved LabelEncoder
        df['TERM_ID'] = label_encoder.transform(df['TERM_ID'])
        df['TRN_DT'] = pd.to_datetime(df['TRN_DT'], format='%Y-%m-%d')

        today = pd.Timestamp.today().normalize()
        last_fri = today - pd.Timedelta(days=(today.weekday() -5) % 7)

        df = df.sort_values(by='TRN_DT').reset_index(drop=True)

        all_weeks = pd.date_range(start=df['TRN_DT'].min(), end=last_fri, freq='W-SAT')
        
        # Group daily transactions to weekly
        weekly_data = (
            df.set_index('TRN_DT')
            .groupby('TERM_ID')
            .resample('W-SAT', label='left', closed='right')
            .agg({'TRN_AMOUNT': 'sum'})
            .reset_index()
        )

        all_atms = df['TERM_ID'].unique()
        filled_data = []

        for term_id in all_atms:
            atm_data = weekly_data[weekly_data['TERM_ID'] == term_id].set_index('TRN_DT')
            atm_data = atm_data.reindex(all_weeks, fill_value=0)
            atm_data['TERM_ID'] = term_id
            atm_data = atm_data.reset_index().rename(columns={'index': 'TRN_DT'})
            filled_data.append(atm_data)

        weekly_data = pd.concat(filled_data, ignore_index=True)
        
        # Calculate week features
        weekly_data['WEEK'] = (weekly_data['TRN_DT'] - start_date).dt.days // 7
        weekly_data['WEEK_OF_YEAR'] = weekly_data['TRN_DT'].dt.isocalendar().week
        
        # Generate temporal features
        grouped = weekly_data.groupby('TERM_ID')
        
        # Lag features
        weekly_data["lag_trn_1"] = grouped['TRN_AMOUNT'].shift(1)
        
        # Rolling means
        weekly_data["mean_trn_2"] = grouped['TRN_AMOUNT'].transform(
            lambda x: x.rolling(2, min_periods=1).mean()
        )
        weekly_data["mean_trn_4"] = grouped['TRN_AMOUNT'].transform(
            lambda x: x.rolling(4, min_periods=1).mean()
        )
        weekly_data["mean_trn_6"] = grouped['TRN_AMOUNT'].transform(
            lambda x: x.rolling(6, min_periods=1).mean()
        )
        
        # Drop dates and missing values
        processed = weekly_data.drop(columns=['TRN_DT']).dropna()
        
        return processed, weekly_data
    
    except Exception as e:
        raise RuntimeError(f"Preprocessing failed: {str(e)}")


In [17]:
import os
import pandas as pd
from datetime import timedelta
import joblib

# === Load model and label encoder ===
BASE_DIR = os.getcwd()
MODEL_PATH = os.path.join(BASE_DIR, 'atm_weekly_usd_cash_forecast_model.pkl')
DATA_PATH = os.path.join(BASE_DIR, 'data', 'base_data.csv')

try:
    saved_model = joblib.load(MODEL_PATH)
    model = saved_model['model']
    label_encoder = saved_model['label_encoder']
except Exception as e:
    raise RuntimeError(f"Model loading failed: {str(e)}")


# === Preprocessing Function ===
def preprocess_input(df):
    try:
        df['TERM_ID'] = label_encoder.transform(df['TERM_ID'])
        df['TRN_DT'] = pd.to_datetime(df['TRN_DT'], format='%Y-%m-%d')
        df['WEEK'] = df['TRN_DT'].dt.isocalendar().week
        df['WEEK_OF_YEAR'] = df['WEEK']
        return df.copy(), df.copy()
    except Exception as e:
        raise RuntimeError(f"Preprocessing failed: {str(e)}")


# === Prediction Function ===
def predict_future():
    """Generate ATM cash predictions for the next week."""
    try:
        df = pd.read_csv(DATA_PATH)
        _, weekly_data = preprocess_input(df)

        atm_list = weekly_data['TERM_ID'].unique()
        predictions = []

        today = pd.Timestamp.today().normalize()
        next_sat = today + pd.Timedelta(days=(5 - today.weekday()) % 7)

        for term_id in atm_list:
            term_history = weekly_data[weekly_data['TERM_ID'] == term_id]

            if term_history.empty:
                continue

            last_entry = term_history.sort_values('TRN_DT').iloc[-1]
            predicted_week = last_entry['TRN_DT'] + timedelta(weeks=1)

            if predicted_week < next_sat - timedelta(weeks=1):
                continue

            feature_row = {
                'TERM_ID': term_id,
                'WEEK': last_entry['WEEK'] + 1,
                'WEEK_OF_YEAR': next_sat.isocalendar().week,
                'lag_trn_1': last_entry['TRN_AMOUNT'],
                'mean_trn_2': term_history['TRN_AMOUNT'].tail(2).mean(),
                'mean_trn_4': term_history['TRN_AMOUNT'].tail(4).mean(),
                'mean_trn_6': term_history['TRN_AMOUNT'].tail(6).mean()
            }

            # Align feature columns with model input
            features = pd.DataFrame([feature_row]).astype({
                'TERM_ID': 'int32',
                'WEEK': 'int32',
                'WEEK_OF_YEAR': 'int32',
                'lag_trn_1': 'float32',
                'mean_trn_2': 'float32',
                'mean_trn_4': 'float32',
                'mean_trn_6': 'float32'
            })[model.get_booster().feature_names]

            predicted_amount = model.predict(features)[0]

            predictions.append({
                'ATM_ID': label_encoder.inverse_transform([term_id])[0],
                'NEXT_WEEK_START': next_sat.strftime('%Y-%m-%d'),
                'PREDICTED_AMOUNT': int(round(predicted_amount, 0))
            })

        return pd.DataFrame(predictions)

    except Exception as e:
        raise RuntimeError(f"Prediction failed: {str(e)}")


# === Run & Save Predictions ===
if __name__ == "__main__":
    try:
        predictions = predict_future()
        predictions.to_csv('weekly_usd_predictions.csv', index=False)
        print("✅ Weekly predictions generated successfully:\n")
        print(predictions.head())
    except RuntimeError as err:
        print(f"❌ {err}")


✅ Weekly predictions generated successfully:

Empty DataFrame
Columns: []
Index: []


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
import joblib

# Load the saved model file
model_data = joblib.load("atm_weekly_cash_forecast_model.pkl")
model = model_data['model']  # Extract the XGBoost model

# Now you can print the feature names
print(model.get_booster().feature_names)


['TERM_ID', 'WEEK', 'WEEK_OF_YEAR', 'FIRST_WEEK_OF_MONTH', 'WEEK_OF_MONTH', 'lag_trn_1', 'mean_trn_4', 'mean_trn_2', 'mean_trn_6']


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

  setstate(state)
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
