In [7]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.arima.model import ARIMA
from xgboost import XGBRegressor
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import requests
from bs4 import BeautifulSoup
import time
import warnings

warnings.filterwarnings('ignore')

# Simulate RBI CPI data (replace with actual RBI DBIE data)
cpi_data = pd.DataFrame({
    'Date': pd.date_range(start='2024-01-01', periods=17, freq='M'),
    'CPI': np.random.uniform(100, 150, size=17)
})

# Simulate alternative economic indicators (fuel prices, commodity prices)
economic_data = pd.DataFrame({
    'Date': pd.date_range(start='2024-01-01', periods=17, freq='M'),
    'FuelPrice': np.random.uniform(80, 120, size=17),
    'CommodityIndex': np.random.uniform(95, 110, size=17)
})

# Simulate online price data (corrected length)
price_data = pd.DataFrame({
    'Date': pd.date_range(start='2024-01-01', periods=17, freq='M'),
    'PriceIndex': np.random.uniform(90, 110, size=17)
})

# Merge all data sources
merged_data = cpi_data.merge(price_data, on='Date').merge(economic_data, on='Date')

# Train enhanced CPI prediction models
def train_cpi_models(merged_data):
    X = merged_data[['PriceIndex', 'FuelPrice', 'CommodityIndex']]
    y = merged_data['CPI']

    # Linear Regression Model
    lin_reg_model = LinearRegression()
    lin_reg_model.fit(X, y)

    # ARIMA Model for Time-Series Prediction
    arima_model = ARIMA(y, order=(2,1,2))
    arima_model_fit = arima_model.fit()

    # XGBoost Model for Advanced Forecasting
    xgb_model = XGBRegressor(n_estimators=100, max_depth=5)
    xgb_model.fit(X, y)

    return lin_reg_model, arima_model_fit, xgb_model

lin_reg_model, arima_model_fit, xgb_model = train_cpi_models(merged_data)

# Web-scraping function for real-time Numbeo price data
def fetch_numbeo_prices():
    url = "https://www.numbeo.com/cost-of-living/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    price_data = soup.find_all('td', class_='price')

    prices = [float(price.text.strip().replace('$', '').replace(',', '')) for price in price_data[:5]]
    avg_price = np.mean(prices)
    return avg_price

# Simulate real-time price updates (including Numbeo scraping)
def simulate_realtime_prices(n_prices=5):
    print("🔹 Starting real-time inflation nowcasting simulation...")
    timestamps, prices, fuel, commodities = [], [], [], []

    for i in range(n_prices):
        numbeo_price = fetch_numbeo_prices()
        fuel_price = np.random.uniform(80, 120)
        commodity_index = np.random.uniform(95, 110)

        prices.append(numbeo_price)
        fuel.append(fuel_price)
        commodities.append(commodity_index)
        timestamps.append(time.strftime('%Y-%m-%d %H:%M:%S'))

        print(f"📢 Price Update {i+1}: Price Index={numbeo_price:.2f}, Fuel={fuel_price:.2f}, Commodity={commodity_index:.2f}")
        time.sleep(1)

    return pd.DataFrame({'Timestamp': timestamps, 'PriceIndex': prices, 'FuelPrice': fuel, 'CommodityIndex': commodities})

# Run simulation
price_stream = simulate_realtime_prices(n_prices=5)

# Create Dash app for real-time dashboard
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("📊 Real-Time Inflation Nowcasting Dashboard"),
    dcc.Graph(id='cpi-graph'),
    dcc.Interval(id='interval-component', interval=5*1000, n_intervals=0)
])

@app.callback(
    Output('cpi-graph', 'figure'),
    [Input('interval-component', 'n_intervals')]
)
def update_graph(n):
    global price_stream
    new_data = simulate_realtime_prices(n_prices=1)
    price_stream = pd.concat([price_stream, new_data], ignore_index=True)

    print("🔹 Debugging: Price Stream Data")
    print(price_stream.head())

    # Predict CPI
    try:
        X_new = price_stream[['PriceIndex', 'FuelPrice', 'CommodityIndex']]
        cpi_pred_linreg = lin_reg_model.predict(X_new)
        cpi_pred_arima = arima_model_fit.predict(start=len(X_new), end=len(X_new) + len(price_stream))
        cpi_pred_xgb = xgb_model.predict(X_new)

        # Convert predictions to lists
        price_stream['CPI_LinReg'] = list(cpi_pred_linreg)
        price_stream['CPI_ARIMA'] = list(cpi_pred_arima)
        price_stream['CPI_XGBoost'] = list(cpi_pred_xgb)

        print("🔹 Debugging: CPI Predictions")
        print(price_stream[['CPI_LinReg', 'CPI_ARIMA', 'CPI_XGBoost']].head())

        # Return updated figure
        fig = {
            'data': [
                {'x': price_stream['Timestamp'], 'y': price_stream['CPI_LinReg'], 'type': 'line', 'name': 'Linear Regression CPI'},
                {'x': price_stream['Timestamp'], 'y': price_stream['CPI_ARIMA'], 'type': 'line', 'name': 'ARIMA CPI Prediction'},
                {'x': price_stream['Timestamp'], 'y': price_stream['CPI_XGBoost'], 'type': 'line', 'name': 'XGBoost CPI Prediction'}
            ],
            'layout': {
                'title': '📈 Real-Time CPI Nowcasting',
                'xaxis': {'title': 'Time'},
                'yaxis': {'title': 'Predicted CPI'}
            }
        }
        return fig
    except Exception as e:
        print(f"❌ Callback Error: {e}")
        return {}

# Run the app
if __name__ == '__main__':
    print("🌐 Starting Dash server... Open http://127.0.0.1:8050/ in your browser.")
    app.run(debug=True, port=8051)  # Changed port to avoid conflicts


🔹 Starting real-time inflation nowcasting simulation...
📢 Price Update 1: Price Index=nan, Fuel=109.00, Commodity=101.90
📢 Price Update 2: Price Index=nan, Fuel=96.28, Commodity=103.31
📢 Price Update 3: Price Index=nan, Fuel=104.97, Commodity=108.09
📢 Price Update 4: Price Index=nan, Fuel=101.33, Commodity=102.07
📢 Price Update 5: Price Index=nan, Fuel=116.18, Commodity=107.59
🌐 Starting Dash server... Open http://127.0.0.1:8050/ in your browser.


🔹 Starting real-time inflation nowcasting simulation...
📢 Price Update 1: Price Index=nan, Fuel=94.79, Commodity=108.89
🔹 Debugging: Price Stream Data
             Timestamp  PriceIndex   FuelPrice  CommodityIndex
0  2025-05-27 14:31:14         NaN  108.999913      101.898815
1  2025-05-27 14:31:17         NaN   96.278921      103.310312
2  2025-05-27 14:31:20         NaN  104.965235      108.088429
3  2025-05-27 14:31:25         NaN  101.330433      102.069949
4  2025-05-27 14:31:28         NaN  116.182477      107.594963
❌ Callback Error: Input X contains NaN.
LinearRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modu