In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_percentage_error

# Load the data
df = pd.read_csv('practise_data_with_ABN_till_dec.csv')

# Convert columns to datetime
date_columns = df.columns[1:]
df[date_columns] = df[date_columns].apply(pd.to_datetime)

# Melt the dataframe to long format
df_melted = df.melt(id_vars=['MaterialNumber'], var_name='Date', value_name='Sales')
df_melted = df_melted.sort_values(['MaterialNumber', 'Date'])

# Function to forecast for a single MaterialNumber
def forecast_material(data):
    model = ARIMA(data['Sales'].values[:-1], order=(1,1,1))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=1)
    return forecast[0]

# Dictionary to store results
results = {}

# Iterate through each MaterialNumber
for material in df['MaterialNumber']:
    material_data = df_melted[df_melted['MaterialNumber'] == material]
    
    # Forecast
    forecast = forecast_material(material_data)
    
    # Actual value
    actual = material_data['Sales'].values[-1]
    
    # Store results
    results[material] = {'Forecast': forecast, 'Actual': actual}

# Calculate overall MAPE
y_true = [results[k]['Actual'] for k in results]
y_pred = [results[k]['Forecast'] for k in results]
mape = mean_absolute_percentage_error(y_true, y_pred)

print(f"Overall MAPE: {mape:.2%}")

# Print individual results
for material, values in results.items():
    print(f"{material}: Forecast = {values['Forecast']:.2f}, Actual = {values['Actual']}, Error = {abs(values['Forecast'] - values['Actual']):.2f}")

UFuncTypeError: ufunc 'subtract' cannot use operands with types dtype('<m8[ns]') and dtype('O')

In [6]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error
from statsmodels.tsa.seasonal import seasonal_decompose

import pandas as pd

def load_and_prepare_data(filepath, material_column='MaterialNumber'):
    df = pd.read_csv(filepath)
    if material_column not in df.columns:
      raise ValueError(f"Material column '{material_column}' not found in DataFrame.")
    unique_materials = df[material_column].unique()
    prepared_data = []
    for material in unique_materials:
      df_material = df[df[material_column] == material].drop(material_column, axis=1).T
      df_material.columns = ['Sales']  # Assuming 'Sales' for prepared data (modify as needed)
      df_material.index = pd.to_datetime(df_material.index, format='%d-%m-%Y')
      prepared_data.append(df_material)
    return pd.concat(prepared_data, ignore_index=True)


def forecast_arima(data, train_periods):
    train = data['Sales'].values[-train_periods:-1]
    model = ARIMA(train, order=(1,1,1))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=1)
    return forecast[0]

def forecast_prophet(data, train_periods):
    train = data.iloc[-train_periods:-1].rename(columns={'Date': 'ds', 'Sales': 'y'})
    model = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
    model.fit(train)
    future = model.make_future_dataframe(periods=1, freq='M')
    forecast = model.predict(future)
    return forecast['yhat'].iloc[-1]

def forecast_moving_average(data, window):
    return data['Sales'].rolling(window=window).mean().iloc[-2]

def ensemble_forecast(data, train_periods):
    arima_forecast = forecast_arima(data, train_periods)
    prophet_forecast = forecast_prophet(data, train_periods)
    ma_forecast = forecast_moving_average(data, 3)
    return np.mean([arima_forecast, prophet_forecast, ma_forecast])

def add_features(data):
    data['Month'] = data['Date'].dt.month
    data['Year'] = data['Date'].dt.year
    decomposition = seasonal_decompose(data['Sales'], model='additive', period=12)
    data['Trend'] = decomposition.trend
    data['Seasonality'] = decomposition.seasonal
    return data

def forecast_and_evaluate(df, train_periods):
    results = {}
    for material in df['MaterialNumber'].unique():
        material_data = df[df['MaterialNumber'] == material]
        material_data = add_features(material_data)
        
        forecast = ensemble_forecast(material_data, train_periods)
        actual = material_data['Sales'].values[-1]
        
        results[material] = {'Forecast': forecast, 'Actual': actual}
    
    y_true = [results[k]['Actual'] for k in results]
    y_pred = [results[k]['Forecast'] for k in results]
    mape = mean_absolute_percentage_error(y_true, y_pred)
    
    return results, mape

# Main execution
file_path = 'practise_data_with_ABN_till_dec.csv'
df = load_and_prepare_data(file_path)

# Try different training periods
training_periods = [12, 24, 36]  # 1 year, 2 years, 3 years

for periods in training_periods:
    results, mape = forecast_and_evaluate(df, periods)
    
    print(f"\nResults for {periods} months training period:")
    print(f"Overall MAPE: {mape:.2%}")
    
    # Print individual results (limiting to first 5 for brevity)
    for i, (material, values) in enumerate(results.items()):
        if i >= 5:
            break
        print(f"{material}: Forecast = {values['Forecast']:.2f}, Actual = {values['Actual']}, Error = {abs(values['Forecast'] - values['Actual']):.2f}")

KeyError: 'MaterialNumber'

In [12]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error
from statsmodels.tsa.seasonal import seasonal_decompose


def load_and_prepare_data(filepath, material_column="MaterialNumber"):
    df = pd.read_csv(filepath)
    if material_column not in df.columns:
        raise ValueError(f"Material column '{material_column}' not found in DataFrame.")

    unique_materials = df[material_column].unique()
    prepared_data = []
    for material in unique_materials:
        df_material = df[df[material_column] == material].drop(material_column, axis=1).T
        df_material.columns = ["Sales"]  # Assuming 'Sales' for prepared data (modify as needed)
        df_material.index = pd.to_datetime(df_material.index, format="%d-%m-%Y")
                df_material["Material"] = material  # Add "Material" column with material name
        prepared_data.append(df_material)
    return pd.concat(prepared_data, ignore_index=True)


def forecast_arima(data, train_periods):
    train = data["Sales"].values[-train_periods:]  # Use all data for training
    model = ARIMA(train, order=(1, 1, 1))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=1)
    return forecast[0]


def forecast_prophet(data, train_periods):
    train = data.iloc[-train_periods:].reset_index()  # Reset index for Prophet
    train.columns = {"Date": "ds", "Sales": "y"}
    model = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
    model.fit(train)
    future = model.make_future_dataframe(periods=1, freq="M")
    forecast = model.predict(future)
    return forecast["yhat"].iloc[-1]


def forecast_moving_average(data, window):
    return data["Sales"].rolling(window=window).mean().iloc[-1]  # Use last value of MA


def ensemble_forecast(data, train_periods):
    arima_forecast = forecast_arima(data, train_periods)
    prophet_forecast = forecast_prophet(data, train_periods)
    ma_forecast = forecast_moving_average(data, 3)
    return np.mean([arima_forecast, prophet_forecast, ma_forecast])


def add_features(data):
    data["Month"] = data["Date"].dt.month
    data["Year"] = data["Date"].dt.year
    decomposition = seasonal_decompose(data["Sales"], model="additive", period=12)
    data["Trend"] = decomposition.trend
    data["Seasonality"] = decomposition.seasonal
    return data


def forecast_and_evaluate(df, train_periods):
    results = {}
    for material in df["MaterialNumber"].unique():
        material_data = df[df["MaterialNumber"] == material]
        material_data = add_features(material_data)

        forecast = ensemble_forecast(material_data, train_periods)
        actual = material_data["Sales"].values[-1]

        results[material] = {"Forecast": forecast, "Actual": actual}

    y_true = [results[k]["Actual"] for k in results]
    y_pred = [results[k]["Forecast"] for k in results]
    mape = mean_absolute_percentage_error(y_true, y_pred)

    return results, mape


# Main execution
file_path = "practise_data_with_ABN_till_dec.csv"
df = load_and_prepare_data(file_path)

# Try different training periods
training_periods = [12, 24, 36]  # 1 year, 2 years, 3 years

for periods in training_periods:
    results, mape = forecast_and_evaluate(df, periods)

    print(f"\nResults for {periods} months training period:")
    print(f"Overall MAPE: {mape:.2%}")

    # Print individual results (limiting to first 5 for brevity)
    for i, (material, values) in enumerate(results.items()):
        if i >= 5:
            break
        print(
            f"{material}: Forecast = {values['Forecast']:.2f}, Actual = {values['Actual']}, Error = {abs(values['Forecast'] - values['Actual']):.2f}"
        )

ValueError: Length mismatch: Expected axis has 1 elements, new values have 2 elements