In [1]:
%run preprocessing.py

In [2]:
# Define proper month mapping
month_mapping = {
    'January': 1, 'February': 2, 'March': 3, 'April': 4, 
    'May': 5, 'June': 6, 'July': 7, 'August': 8, 
    'September': 9, 'October': 10, 'November': 11, 'December': 12
}

# Map the MONTH column to its index
df['month_index'] = df['MONTH'].map(month_mapping)

## Predicting Gross Margin

In [3]:
df.columns

Index(['BUSINESS GROUP', 'MATERIAL GROUP', 'YEAR', 'MONTH', ' SALES SERVICE ',
       ' LESS ORC ', ' NET SALES ', ' COST OF GOODS SOLD ',
       ' TRANSACTION MARGIN ', ' BACKEND INCOME ', ' ESTIMATE INCOME ',
       ' TOTAL BACKEND INCOME ', ' DEPRECIATION INVENTORY ',
       ' SALES COMMISSION ', ' GROSS MARGIN ', ' CASH DISCOUNT ',
       ' GROSS MARGIN CD ', ' OTHER INCOME ', ' FREIGHT ', ' INSURANCE ',
       ' COMMERCIAL TAX ', ' DRIECT EXPENSES ', ' COMPENSATION ',
       ' STAFF WELFATE ', ' OUTSOURCED RESOURCE ', ' TRAVEL ', ' CONVEYANCE ',
       ' COMMUNICATION ', ' UTILITIES ', ' REPAIRS MAINTENANCE ',
       ' PRINTING STATIONERY ', ' RENT ', ' RENT WAREHOUSE ',
       ' WAREHOUSE EXPENSES ', ' ENTERTAINMENT ', ' TRAINING ',
       ' ADVERTISMENT EXPENSES ', ' BAD DEBTS ', ' BANK CHARGES ',
       ' RATE TAXES ', ' CONSULTANCY BROKER ', ' AUDIT FEE ',
       ' FALSE GAIN OR LOSS ', ' EXCHANGE GAIN OR LOSS ',
       ' DIRECT SITTING FEE ', ' CSR ', ' FACTORING ', ' OTHER E

In [None]:
#prediction on historical data
from prophet_forcast import Prophet
# set 'ds' (datetime) and 'y' (target) column
prophet_data = df[['YEAR', 'month_index', 'C_Gross_Margin', ' CASH DISCOUNT ', ' OTHER INCOME ']].copy()
prophet_data['ds'] = pd.to_datetime(prophet_data['YEAR'].astype(str) + '-' + prophet_data['month_index'].astype(str).str.zfill(2) + '-01')
prophet_data = prophet_data.rename(columns={'C_Gross_Margin': 'y'})

# train and test split
model = Prophet()
model.add_regressor(' CASH DISCOUNT ')
model.add_regressor(' OTHER INCOME ')
model.fit(prophet_data[['ds', 'y', ' CASH DISCOUNT ', ' OTHER INCOME ']])

forecast = model.predict(prophet_data[['ds', ' CASH DISCOUNT ', ' OTHER INCOME ']])
results = pd.DataFrame({
    'Date': prophet_data['ds'],
    'Actual': prophet_data['y'],
    'Predicted': forecast['yhat']
})

10:27:07 - cmdstanpy - INFO - Chain [1] start processing
10:27:10 - cmdstanpy - INFO - Chain [1] done processing


In [5]:
#prediction for future dates
gross_margin_df = df[['YEAR', 'month_index', 'C_Gross_Margin', ' CASH DISCOUNT ', ' OTHER INCOME ']].copy()
gross_margin_df['ds'] = pd.to_datetime(gross_margin_df['YEAR'].astype(str) + '-' + gross_margin_df['month_index'].astype(str).str.zfill(2) + '-01')
gross_margin_df = gross_margin_df.rename(columns={'C_Gross_Margin': 'y'})

# Initialize the Prophet model
model = Prophet()

# Add regressors
model.add_regressor(' CASH DISCOUNT ')
model.add_regressor(' OTHER INCOME ')

# Fit the model
model.fit(gross_margin_df[['ds', 'y', ' CASH DISCOUNT ', ' OTHER INCOME ']])

# Prepare the future dataframe
future_dates = model.make_future_dataframe(periods=12, freq='MS')

# Add regressor values for future dates
future_monthly = df.groupby(['month_index'])[[' CASH DISCOUNT ', ' OTHER INCOME ']].mean().reset_index()
future_dates['month_index'] = future_dates['ds'].dt.month
future_dates = pd.merge(future_dates, future_monthly, on='month_index', how='left')

# Predict
future_forecast = model.predict(future_dates)

# Extract the result
future_results = pd.DataFrame({
    'Date': future_forecast['ds'],
    'Predicted': future_forecast['yhat']
})

10:27:15 - cmdstanpy - INFO - Chain [1] start processing
10:27:16 - cmdstanpy - INFO - Chain [1] done processing


In [6]:
#combining actuals and forcasted values
combined_results = pd.concat([
    results[['Date', 'Actual', 'Predicted']],
    future_results[['Date', 'Predicted']]
], ignore_index=True)

#groupby month
combined_results['Month-Year'] = combined_results['Date'].dt.to_period('M')
monthly_combined_results = combined_results.groupby('Month-Year').mean().reset_index()
monthly_combined_results['Date'] = monthly_combined_results['Month-Year'].dt.to_timestamp()


In [7]:
monthly_combined_results

Unnamed: 0,Month-Year,Date,Actual,Predicted
0,2019-04,2019-04-01,4.253273e+05,2.226648e+07
1,2019-05,2019-05-01,8.103678e+06,2.184163e+07
2,2019-06,2019-06-01,2.524800e+07,1.814884e+07
3,2019-07,2019-07-01,2.120301e+07,1.319831e+07
4,2019-08,2019-08-01,1.420250e+07,1.229315e+07
...,...,...,...,...
70,2025-02,2025-02-01,,1.955231e+07
71,2025-03,2025-03-01,,3.632037e+07
72,2025-04,2025-04-01,,1.733319e+07
73,2025-05,2025-05-01,,1.952291e+07


In [8]:
fig = go.Figure()

# Plot Actual Values
fig.add_trace(go.Scatter(
    x=monthly_combined_results['Date'],
    y=monthly_combined_results['Actual'],
    mode='lines+markers',
    name='Actual Values',
    line=dict(color='blue')
))

# Plot Predicted Values 
fig.add_trace(go.Scatter(
    x=monthly_combined_results['Date'],
    y=monthly_combined_results['Predicted'],
    mode='lines+markers',
    name='Predicted Values',
    line=dict(color='red', dash='dash')
))
fig.update_layout(
    title='Actual and Predicted Gross Margin',
    xaxis_title='Date',
    yaxis_title='Gross Margin',
    template='plotly_white',
    legend=dict(x=0.02, y=0.98),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    xaxis_tickangle=-45
)
fig.show()

In [9]:
mape = np.mean(np.abs((monthly_combined_results['Actual'] - monthly_combined_results['Predicted']) / monthly_combined_results['Actual'])) * 100
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 221.28%


### Hyperparameter tuning for gross margin's Prophet model

Grid search 

In [10]:
# from sklearn.metrics import mean_absolute_percentage_error as mape

# # Define the hyperparameter grid
# param_grid = {
#     'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5, 1.0],
#     'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
#     'holidays_prior_scale': [0.01, 0.1, 1.0, 10.0],
#     'seasonality_mode': ['additive', 'multiplicative']
# }

# # Create combinations of all parameters
# from itertools import product
# all_params = [dict(zip(param_grid.keys(), v)) for v in product(*param_grid.values())]

# # Initialize lists to store results
# mape_scores = []
# best_model = None
# best_params = None

# # Train and evaluate each combination
# for params in all_params:
#     model = Prophet(
#         changepoint_prior_scale=params['changepoint_prior_scale'],
#         seasonality_prior_scale=params['seasonality_prior_scale'],
#         holidays_prior_scale=params['holidays_prior_scale'],
#         seasonality_mode=params['seasonality_mode']
  #  )
    
    # Add regressors if necessary
#     model.add_regressor(' CASH DISCOUNT ')
#     model.add_regressor(' OTHER INCOME ')
    
#     # Fit the model
#     model.fit(prophet_data)
    
#     # Predict on the test set
#     test_forecast = model.predict(prophet_data[['ds', ' CASH DISCOUNT ', ' OTHER INCOME ']])
#     y_true = prophet_data['y']
#     y_pred = forecast['yhat']
    
#     # Compute MAPE
#     current_mape = mape(y_true, y_pred)
#     mape_scores.append(current_mape)
    
#     # Track the best model
#     if best_model is None or current_mape < min(mape_scores):
#         best_model = model
#         best_params = params

# # Print the best parameters and MAPE score
# print("Best Parameters:", best_params)
# print("Best MAPE:", min(mape_scores))


In [11]:
Best_Parameters = {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01, 'holidays_prior_scale': 0.01, 'seasonality_mode': 'additive'}
Best_MAPE: 1222.1473286964786


### Implementation of best features for Gross margin

In [12]:
best_model = Prophet(
    changepoint_prior_scale=Best_Parameters['changepoint_prior_scale'],
    seasonality_prior_scale=Best_Parameters['seasonality_prior_scale'],
    holidays_prior_scale=Best_Parameters['holidays_prior_scale'],
    seasonality_mode=Best_Parameters['seasonality_mode']
)
best_model.add_regressor(' CASH DISCOUNT ')
best_model.add_regressor(' OTHER INCOME ')
best_model.fit(prophet_data)


10:27:19 - cmdstanpy - INFO - Chain [1] start processing
10:27:21 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x175a4a20690>

In [13]:
test_forecast = best_model.predict(prophet_data[['ds', ' CASH DISCOUNT ', ' OTHER INCOME ']])
test_forecast = pd.DataFrame({
    'Date': test_forecast['ds'],
    'Actual': prophet_data['y'],
    'Predicted': test_forecast['yhat']
})

In [14]:
#prediction for future dates
gross_margin_df = df[['YEAR', 'month_index', 'C_Gross_Margin', ' CASH DISCOUNT ', ' OTHER INCOME ']].copy()
gross_margin_df['ds'] = pd.to_datetime(gross_margin_df['YEAR'].astype(str) + '-' + gross_margin_df['month_index'].astype(str).str.zfill(2) + '-01')
gross_margin_df = gross_margin_df.rename(columns={'C_Gross_Margin': 'y'})

best_model = Prophet(
    changepoint_prior_scale=Best_Parameters['changepoint_prior_scale'],
    seasonality_prior_scale=Best_Parameters['seasonality_prior_scale'],
    holidays_prior_scale=Best_Parameters['holidays_prior_scale'],
    seasonality_mode=Best_Parameters['seasonality_mode']
)
best_model.add_regressor(' CASH DISCOUNT ')
best_model.add_regressor(' OTHER INCOME ')

# Fit the model
best_model.fit(gross_margin_df[['ds', 'y', ' CASH DISCOUNT ', ' OTHER INCOME ']])

# Prepare the future dataframe
future_dates_g = best_model.make_future_dataframe(periods=12, freq='MS')

# Add regressor values for future dates
future_monthly_g = df.groupby(['month_index'])[[' CASH DISCOUNT ', ' OTHER INCOME ']].mean().reset_index()
future_dates_g['month_index'] = future_dates_g['ds'].dt.month
future_dates_g = pd.merge(future_dates_g, future_monthly_g, on='month_index', how='left')

# Predict
future_forecast_g = best_model.predict(future_dates)

# Extract the result
future_results_g = pd.DataFrame({
    'Date': future_forecast_g['ds'],
    'Predicted': future_forecast_g['yhat']
})

10:27:26 - cmdstanpy - INFO - Chain [1] start processing
10:27:28 - cmdstanpy - INFO - Chain [1] done processing


In [15]:
#combining actuals and forcasted values
combined_results_g = pd.concat([
    test_forecast[['Date','Actual', 'Predicted']],
    future_results_g[['Date', 'Predicted']]
], ignore_index=True)

#groupby month
combined_results_g['Month-Year'] = combined_results_g['Date'].dt.to_period('M')
monthly_combined_results_g = combined_results_g.groupby('Month-Year').mean().reset_index()
monthly_combined_results_g['Date'] = monthly_combined_results_g['Month-Year'].dt.to_timestamp()

In [16]:
fig = go.Figure()

# Plot Actual Values
fig.add_trace(go.Scatter(
    x=monthly_combined_results_g['Date'],
    y=monthly_combined_results['Actual'],
    mode='lines+markers',
    name='Actual Values',
    line=dict(color='blue')
))

# Plot Predicted Values 
fig.add_trace(go.Scatter(
    x=monthly_combined_results_g['Date'],
    y=monthly_combined_results_g['Predicted'],
    mode='lines+markers',
    name='Predicted Values',
    line=dict(color='red', dash='dash')
))
fig.update_layout(
    title='Actual and Predicted Gross Margin',
    xaxis_title='Date',
    yaxis_title='Gross Margin',
    template='plotly_white',
    legend=dict(x=0.02, y=0.98),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    xaxis_tickangle=-45
)
fig.show()

## Predicting Revenue

In [None]:
# revenue prediction

#prediction on historical data
from prophet_forcast import Prophet
# set 'ds' (datetime) and 'y' (target) column
prophet_data_r = df[['YEAR', 'month_index','C_Revenue', ' NET SALES ', ' SALES COMMISSION ', ' OTHER INCOME ']].copy()
prophet_data_r['ds'] = pd.to_datetime(prophet_data_r['YEAR'].astype(str) + '-' + prophet_data_r['month_index'].astype(str).str.zfill(2) + '-01')
prophet_data_r = prophet_data_r.rename(columns={'C_Revenue': 'y'})

# initialize model
model1 = Prophet()
model1.add_regressor(' NET SALES ')
model1.add_regressor(' OTHER INCOME ')
model1.add_regressor(' SALES COMMISSION ')
model1.fit(prophet_data_r[['ds', 'y', ' NET SALES ',' OTHER INCOME ', ' SALES COMMISSION ']])

forecast_r = model1.predict(prophet_data_r[['ds', ' NET SALES ',' OTHER INCOME ', ' SALES COMMISSION ']])
results_r = pd.DataFrame({
    'Date': prophet_data_r['ds'],
    'Actual': prophet_data_r['y'],
    'Predicted': forecast_r['yhat']
})

10:27:30 - cmdstanpy - INFO - Chain [1] start processing
10:27:34 - cmdstanpy - INFO - Chain [1] done processing


In [18]:
#prediction for future dates
revenue_df = df[['YEAR', 'month_index', ' NET SALES ', ' SALES COMMISSION ', ' OTHER INCOME ', 'C_Revenue']].copy()
revenue_df['ds'] = pd.to_datetime(revenue_df['YEAR'].astype(str) + '-' + revenue_df['month_index'].astype(str).str.zfill(2) + '-01')
revenue_df = revenue_df.rename(columns={'C_Revenue': 'y'})

model1 = Prophet()
# Add regressors
model1.add_regressor(' NET SALES ')
model1.add_regressor(' OTHER INCOME ')
model1.add_regressor(' SALES COMMISSION ')

# Fit the model
model1.fit(revenue_df[['ds', 'y', ' NET SALES ', ' SALES COMMISSION ', ' OTHER INCOME ']])

# Prepare the future dataframe
future_dates_r = model1.make_future_dataframe(periods=12, freq='MS')

# Add regressor values for future dates
future_monthly_r = df.groupby(['month_index'])[[' NET SALES ', ' SALES COMMISSION ', ' OTHER INCOME ']].mean().reset_index()
future_dates_r['month_index'] = future_dates_r['ds'].dt.month
future_dates_r = pd.merge(future_dates_r, future_monthly_r, on='month_index', how='left')

# Predict
forecast_r = model1.predict(future_dates_r)

# Extract the result
future_results_r = pd.DataFrame({
    'Date': forecast_r['ds'],
    'Predicted': forecast_r['yhat']
})

10:27:39 - cmdstanpy - INFO - Chain [1] start processing
10:27:42 - cmdstanpy - INFO - Chain [1] done processing


In [19]:
#combining actuals and forcasted values
combined_results_r = pd.concat([
    results_r[['Date', 'Actual', 'Predicted']],
    future_results_r[['Date', 'Predicted']]
], ignore_index=True)

#groupby month
combined_results_r['Month-Year'] = combined_results_r['Date'].dt.to_period('M')
monthly_combined_results_r = combined_results_r.groupby('Month-Year').mean().reset_index()
monthly_combined_results_r['Date'] = monthly_combined_results_r['Month-Year'].dt.to_timestamp()

In [20]:
fig = go.Figure()

# Plot Actual Values
fig.add_trace(go.Scatter(
    x=monthly_combined_results_r['Date'],
    y=monthly_combined_results_r['Actual'],
    mode='lines+markers',
    name='Actual Values',
    line=dict(color='blue')
))

# Plot Predicted Values 
fig.add_trace(go.Scatter(
    x=monthly_combined_results_r['Date'],
    y=monthly_combined_results_r['Predicted'],
    mode='lines+markers',
    name='Predicted Values',
    line=dict(color='red', dash='dash')
))
fig.update_layout(
    title='Actual and Predicted Revenue',
    xaxis_title='Date',
    yaxis_title='Revenue',
    template='plotly_white',
    legend=dict(x=0.02, y=0.98),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    xaxis_tickangle=-45
)
fig.show()

In [21]:
mape = np.mean(np.abs((monthly_combined_results_r['Actual'] - monthly_combined_results_r['Predicted']) / monthly_combined_results_r['Actual'])) * 100
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 32.05%


## Predicting EBITDA

In [None]:
# ebitda prediction

#prediction on historical data
from prophet_forcast import Prophet
# set 'ds' (datetime) and 'y' (target) column
prophet_data_e = df[['YEAR', 'month_index','C_EBITDA', 'C_Revenue', 'C_Gross_Margin', 'Man_power_cost','Biz_trading_cost','Other_opex']].copy()
prophet_data_e['ds'] = pd.to_datetime(prophet_data_e['YEAR'].astype(str) + '-' + prophet_data_e['month_index'].astype(str).str.zfill(2) + '-01')
prophet_data_e = prophet_data_e.rename(columns={'C_EBITDA': 'y'})

# initialize model
model2 = Prophet()
model2.add_regressor('C_Gross_Margin')
model2.add_regressor('Man_power_cost')
model2.add_regressor('C_Revenue')
model2.add_regressor('Biz_trading_cost')
model2.add_regressor('Other_opex')
model2.fit(prophet_data_e[['ds', 'y', 'C_Revenue', 'C_Gross_Margin', 'Man_power_cost','Biz_trading_cost','Other_opex']])

forecast_e = model2.predict(prophet_data_e[['ds', 'C_Revenue', 'C_Gross_Margin', 'Man_power_cost','Biz_trading_cost','Other_opex']])
results_e = pd.DataFrame({
    'Date': prophet_data_e['ds'],
    'Actual': prophet_data_e['y'],
    'Predicted': forecast_e['yhat']
})

10:27:44 - cmdstanpy - INFO - Chain [1] start processing
10:27:48 - cmdstanpy - INFO - Chain [1] done processing


In [23]:
#prediction for future dates
ebitda_df = df[['YEAR', 'month_index', 'C_Revenue', 'C_Gross_Margin', 'Man_power_cost','Biz_trading_cost','Other_opex','C_EBITDA']].copy()
ebitda_df['ds'] = pd.to_datetime(ebitda_df['YEAR'].astype(str) + '-' + ebitda_df['month_index'].astype(str).str.zfill(2) + '-01')
ebitda_df = ebitda_df.rename(columns={'C_EBITDA': 'y'})

model2 = Prophet()
# Add regressors
model2.add_regressor('C_Gross_Margin')
model2.add_regressor('Man_power_cost')
model2.add_regressor('C_Revenue')
model2.add_regressor('Biz_trading_cost')
model2.add_regressor('Other_opex')

# Fit the model
model2.fit(ebitda_df[['ds', 'y', 'C_Revenue', 'C_Gross_Margin', 'Man_power_cost','Biz_trading_cost','Other_opex']])

# Prepare the future dataframe
future_dates_e = model2.make_future_dataframe(periods=12, freq='MS')

# Add regressor values for future dates
future_monthly_e = df.groupby(['month_index'])[['C_Revenue', 'C_Gross_Margin', 'Man_power_cost','Biz_trading_cost','Other_opex']].mean().reset_index()
future_dates_e['month_index'] = future_dates_e['ds'].dt.month
future_dates_e = pd.merge(future_dates_e, future_monthly_e, on='month_index', how='left')

# Predict
forecast_e = model2.predict(future_dates_e)

# Extract the result
future_results_e = pd.DataFrame({
    'Date': forecast_e['ds'],
    'Predicted': forecast_e['yhat']
})

10:27:53 - cmdstanpy - INFO - Chain [1] start processing
10:27:57 - cmdstanpy - INFO - Chain [1] done processing


In [24]:
#combining actuals and forcasted values
combined_results_e = pd.concat([
    results_e[['Date', 'Actual', 'Predicted']],
    future_results_e[['Date', 'Predicted']]
], ignore_index=True)

#groupby month
combined_results_e['Month-Year'] = combined_results_e['Date'].dt.to_period('M')
monthly_combined_results_e = combined_results_e.groupby('Month-Year').mean().reset_index()
monthly_combined_results_e['Date'] = monthly_combined_results_e['Month-Year'].dt.to_timestamp()

In [25]:
fig = go.Figure()

# Plot Actual Values
fig.add_trace(go.Scatter(
    x=monthly_combined_results_e['Date'],
    y=monthly_combined_results_e['Actual'],
    mode='lines+markers',
    name='Actual Values',
    line=dict(color='blue')
))

# Plot Predicted Values 
fig.add_trace(go.Scatter(
    x=monthly_combined_results_e['Date'],
    y=monthly_combined_results_e['Predicted'],
    mode='lines+markers',
    name='Predicted Values',
    line=dict(color='red', dash='dash')
))
fig.update_layout(
    title='Actual and Predicted EBITDA',
    xaxis_title='Date',
    yaxis_title='EBITDA',
    template='plotly_white',
    legend=dict(x=0.02, y=0.98),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    xaxis_tickangle=-45
)
fig.show()

In [26]:
mape = np.mean(np.abs((monthly_combined_results_e['Actual'] - monthly_combined_results_e['Predicted']) / monthly_combined_results_e['Actual'])) * 100
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 121.93%


## Predicting EBIT

In [None]:
# ebit prediction

#prediction on historical data
from prophet_forcast import Prophet
# set 'ds' (datetime) and 'y' (target) column
prophet_data_eb = df[['YEAR', 'month_index','C_EBITDA',' DEPRECIATION ON ASSET ','C_EBIT']].copy()
prophet_data_eb['ds'] = pd.to_datetime(prophet_data_eb['YEAR'].astype(str) + '-' + prophet_data_eb['month_index'].astype(str).str.zfill(2) + '-01')
prophet_data_eb = prophet_data_eb.rename(columns={'C_EBIT': 'y'})

# initialize model
model3 = Prophet()
model3.add_regressor('C_EBITDA')
model3.add_regressor(' DEPRECIATION ON ASSET ')

model3.fit(prophet_data_eb[['ds', 'y', 'C_EBITDA',' DEPRECIATION ON ASSET ']])

forecast_eb = model3.predict(prophet_data_eb[['ds', 'C_EBITDA',' DEPRECIATION ON ASSET ']])
results_eb = pd.DataFrame({
    'Date': prophet_data_eb['ds'],
    'Actual': prophet_data_eb['y'],
    'Predicted': forecast_eb['yhat']
})

10:27:58 - cmdstanpy - INFO - Chain [1] start processing
10:28:06 - cmdstanpy - INFO - Chain [1] done processing


In [28]:
#prediction for future dates
ebit_df = df[['YEAR', 'month_index','C_EBITDA',' DEPRECIATION ON ASSET ','C_EBIT']].copy()
ebit_df['ds'] = pd.to_datetime(ebit_df['YEAR'].astype(str) + '-' + ebit_df['month_index'].astype(str).str.zfill(2) + '-01')
ebit_df = ebit_df.rename(columns={'C_EBIT': 'y'})

model3 = Prophet()
model3.add_regressor('C_EBITDA')
model3.add_regressor(' DEPRECIATION ON ASSET ')

model3.fit(prophet_data_eb[['ds', 'y', 'C_EBITDA',' DEPRECIATION ON ASSET ']])

# Prepare the future dataframe
future_dates_eb = model3.make_future_dataframe(periods=12, freq='MS')

# Add regressor values for future dates
future_monthly_eb = df.groupby(['month_index'])[['C_EBITDA',' DEPRECIATION ON ASSET ']].mean().reset_index()
future_dates_eb['month_index'] = future_dates_eb['ds'].dt.month
future_dates_eb = pd.merge(future_dates_eb, future_monthly_eb, on='month_index', how='left')

# Predict
forecast_eb = model3.predict(future_dates_eb)

# Extract the result
future_results_eb = pd.DataFrame({
    'Date': forecast_eb['ds'],
    'Predicted': forecast_eb['yhat']
})

10:28:11 - cmdstanpy - INFO - Chain [1] start processing
10:28:18 - cmdstanpy - INFO - Chain [1] done processing


In [29]:
#combining actuals and forcasted values
combined_results_eb = pd.concat([
    results_eb[['Date', 'Actual', 'Predicted']],
    future_results_eb[['Date', 'Predicted']]
], ignore_index=True)

#groupby month
combined_results_eb['Month-Year'] = combined_results_eb['Date'].dt.to_period('M')
monthly_combined_results_eb = combined_results_eb.groupby('Month-Year').mean().reset_index()
monthly_combined_results_eb['Date'] = monthly_combined_results_eb['Month-Year'].dt.to_timestamp()

In [30]:
fig = go.Figure()

# Plot Actual Values
fig.add_trace(go.Scatter(
    x=monthly_combined_results_eb['Date'],
    y=monthly_combined_results_eb['Actual'],
    mode='lines+markers',
    name='Actual Values',
    line=dict(color='blue')
))

# Plot Predicted Values 
fig.add_trace(go.Scatter(
    x=monthly_combined_results_eb['Date'],
    y=monthly_combined_results_eb['Predicted'],
    mode='lines+markers',
    name='Predicted Values',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual and Predicted EBIT',
    xaxis_title='Date',
    yaxis_title='EBIT',
    template='plotly_white',
    legend=dict(x=0.02, y=0.98),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    xaxis_tickangle=-45
)

fig.show()

In [31]:
mape = np.mean(np.abs((monthly_combined_results_eb['Actual'] - monthly_combined_results_eb['Predicted']) / monthly_combined_results_eb['Actual'])) * 100
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 140.36%


## Predicting PBT

In [None]:
# pbt prediction

#prediction on historical data
from prophet_forcast import Prophet
# set 'ds' (datetime) and 'y' (target) column
prophet_data_p = df[['YEAR', 'month_index','C_PBT','Intrest_cost','C_EBIT']].copy()
prophet_data_p['ds'] = pd.to_datetime(prophet_data_p['YEAR'].astype(str) + '-' + prophet_data_p['month_index'].astype(str).str.zfill(2) + '-01')
prophet_data_p = prophet_data_p.rename(columns={'C_PBT': 'y'})

# initialize model
model4 = Prophet()
model4.add_regressor('C_EBIT')
model4.add_regressor('Intrest_cost')

model4.fit(prophet_data_p[['ds', 'y', 'C_EBIT','Intrest_cost']])

forecast_p = model4.predict(prophet_data_p[['ds', 'C_EBIT','Intrest_cost']])
results_p = pd.DataFrame({
    'Date': prophet_data_p['ds'],
    'Actual': prophet_data_p['y'],
    'Predicted': forecast_p['yhat']
})

10:28:20 - cmdstanpy - INFO - Chain [1] start processing
10:28:23 - cmdstanpy - INFO - Chain [1] done processing


In [33]:
#prediction for future dates
pbt_df = df[['YEAR', 'month_index','C_PBT','Intrest_cost','C_EBIT']].copy()
pbt_df['ds'] = pd.to_datetime(pbt_df['YEAR'].astype(str) + '-' + pbt_df['month_index'].astype(str).str.zfill(2) + '-01')
pbt_df = pbt_df.rename(columns={'C_PBT': 'y'})

model4 = Prophet()
model4.add_regressor('C_EBIT')
model4.add_regressor('Intrest_cost')

model4.fit(prophet_data_p[['ds', 'y', 'C_EBIT','Intrest_cost']])

# Prepare the future dataframe
future_dates_p = model4.make_future_dataframe(periods=12, freq='MS')

# Add regressor values for future dates
future_monthly_p = df.groupby(['month_index'])[['C_EBIT','Intrest_cost']].mean().reset_index()
future_dates_p['month_index'] = future_dates_p['ds'].dt.month
future_dates_p = pd.merge(future_dates_p, future_monthly_p, on='month_index', how='left')

# Predict
forecast_p = model4.predict(future_dates_p)

# Extract the result
future_results_p = pd.DataFrame({
    'Date': forecast_p['ds'],
    'Predicted': forecast_p['yhat']
})

10:28:28 - cmdstanpy - INFO - Chain [1] start processing
10:28:30 - cmdstanpy - INFO - Chain [1] done processing


In [34]:
#combining actuals and forcasted values
combined_results_p = pd.concat([
    results_p[['Date', 'Actual', 'Predicted']],
    future_results_p[['Date', 'Predicted']]
], ignore_index=True)

#groupby month
combined_results_p['Month-Year'] = combined_results_p['Date'].dt.to_period('M')
monthly_combined_results_p = combined_results_p.groupby('Month-Year').mean().reset_index()
monthly_combined_results_p['Date'] = monthly_combined_results_p['Month-Year'].dt.to_timestamp()

In [35]:
fig = go.Figure()

# Plot Actual Values
fig.add_trace(go.Scatter(
    x=monthly_combined_results_p['Date'],
    y=monthly_combined_results_p['Actual'],
    mode='lines+markers',
    name='Actual Values',
    line=dict(color='blue')
))

# Plot Predicted Values 
fig.add_trace(go.Scatter(
    x=monthly_combined_results_p['Date'],
    y=monthly_combined_results_p['Predicted'],
    mode='lines+markers',
    name='Predicted Values',
    line=dict(color='red', dash='dash')
))
fig.update_layout(
    title='Actual and Predicted PBT',
    xaxis_title='Date',
    yaxis_title='PBT',
    template='plotly_white',
    legend=dict(x=0.02, y=0.98),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    xaxis_tickangle=-45
)
fig.show()

In [36]:
mape = np.mean(np.abs((monthly_combined_results_p['Actual'] - monthly_combined_results_p['Predicted']) / monthly_combined_results_p['Actual'])) * 100
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 6.70%


In [37]:
df.columns

Index(['BUSINESS GROUP', 'MATERIAL GROUP', 'YEAR', 'MONTH', ' SALES SERVICE ',
       ' LESS ORC ', ' NET SALES ', ' COST OF GOODS SOLD ',
       ' TRANSACTION MARGIN ', ' BACKEND INCOME ', ' ESTIMATE INCOME ',
       ' TOTAL BACKEND INCOME ', ' DEPRECIATION INVENTORY ',
       ' SALES COMMISSION ', ' GROSS MARGIN ', ' CASH DISCOUNT ',
       ' GROSS MARGIN CD ', ' OTHER INCOME ', ' FREIGHT ', ' INSURANCE ',
       ' COMMERCIAL TAX ', ' DRIECT EXPENSES ', ' COMPENSATION ',
       ' STAFF WELFATE ', ' OUTSOURCED RESOURCE ', ' TRAVEL ', ' CONVEYANCE ',
       ' COMMUNICATION ', ' UTILITIES ', ' REPAIRS MAINTENANCE ',
       ' PRINTING STATIONERY ', ' RENT ', ' RENT WAREHOUSE ',
       ' WAREHOUSE EXPENSES ', ' ENTERTAINMENT ', ' TRAINING ',
       ' ADVERTISMENT EXPENSES ', ' BAD DEBTS ', ' BANK CHARGES ',
       ' RATE TAXES ', ' CONSULTANCY BROKER ', ' AUDIT FEE ',
       ' FALSE GAIN OR LOSS ', ' EXCHANGE GAIN OR LOSS ',
       ' DIRECT SITTING FEE ', ' CSR ', ' FACTORING ', ' OTHER E

## Predicting PAT

In [None]:
# pbt prediction

#prediction on historical data
from prophet_forcast import Prophet
# set 'ds' (datetime) and 'y' (target) column
prophet_data_pa = df[['YEAR', 'month_index','C_PBT',' TAX EXPENSES ','C_PAT']].copy()
prophet_data_pa['ds'] = pd.to_datetime(prophet_data_pa['YEAR'].astype(str) + '-' + prophet_data_pa['month_index'].astype(str).str.zfill(2) + '-01')
prophet_data_pa = prophet_data_pa.rename(columns={'C_PAT': 'y'})

# initialize model
model5 = Prophet()
model5.add_regressor(' TAX EXPENSES ')
model5.add_regressor('C_PBT')

model5.fit(prophet_data_pa[['ds', 'y', 'C_PBT',' TAX EXPENSES ']])

forecast_pa = model5.predict(prophet_data_pa[['ds', 'C_PBT',' TAX EXPENSES ']])
results_pa = pd.DataFrame({
    'Date': prophet_data_pa['ds'],
    'Actual': prophet_data_pa['y'],
    'Predicted': forecast_pa['yhat']
    
})

10:28:32 - cmdstanpy - INFO - Chain [1] start processing
10:28:35 - cmdstanpy - INFO - Chain [1] done processing


In [39]:
#prediction for future dates
pat_df = df[['YEAR', 'month_index','C_PBT','C_PAT',' TAX EXPENSES ']].copy()
pat_df['ds'] = pd.to_datetime(pat_df['YEAR'].astype(str) + '-' + pat_df['month_index'].astype(str).str.zfill(2) + '-01')
pat_df = pat_df.rename(columns={'C_PAT': 'y'})

model5 = Prophet()
model5.add_regressor(' TAX EXPENSES ')
model5.add_regressor('C_PBT')

model5.fit(prophet_data_pa[['ds', 'y', 'C_PBT',' TAX EXPENSES ']])

# Prepare the future dataframe
future_dates_pa = model5.make_future_dataframe(periods=12, freq='MS')

# Add regressor values for future dates
future_monthly_pa = df.groupby(['month_index'])[['C_PBT',' TAX EXPENSES ']].mean().reset_index()
future_dates_pa['month_index'] = future_dates_pa['ds'].dt.month
future_dates_pa = pd.merge(future_dates_pa, future_monthly_pa, on='month_index', how='left')

# Predict
forecast_pa = model5.predict(future_dates_pa)

# Extract the result
future_results_pa = pd.DataFrame({
    'Date': forecast_pa['ds'],
    'Predicted': forecast_pa['yhat']
})

10:28:40 - cmdstanpy - INFO - Chain [1] start processing
10:28:44 - cmdstanpy - INFO - Chain [1] done processing


In [40]:
#combining actuals and forcasted values
combined_results_pa = pd.concat([
    results_pa[['Date', 'Actual', 'Predicted']],
    future_results_pa[['Date', 'Predicted']]
], ignore_index=True)

#groupby month
combined_results_pa['Month-Year'] = combined_results_pa['Date'].dt.to_period('M')
monthly_combined_results_pa = combined_results_pa.groupby('Month-Year').mean().reset_index()
monthly_combined_results_pa['Date'] = monthly_combined_results_pa['Month-Year'].dt.to_timestamp()

In [41]:
fig = go.Figure()

# Plot Actual Values
fig.add_trace(go.Scatter(
    x=monthly_combined_results_pa['Date'],
    y=monthly_combined_results_pa['Actual'],
    mode='lines+markers',
    name='Actual Values',
    line=dict(color='blue')
))

# Plot Predicted Values 
fig.add_trace(go.Scatter(
    x=monthly_combined_results_pa['Date'],
    y=monthly_combined_results_pa['Predicted'],
    mode='lines+markers',
    name='Predicted Values',
    line=dict(color='red', dash='dash')
))
fig.update_layout(
    title='Actual and Predicted PAT',
    xaxis_title='Date',
    yaxis_title='PAT',
    template='plotly_white',
    legend=dict(x=0.02, y=0.98),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    xaxis_tickangle=-45
)
fig.show()

In [42]:
mape = np.mean(np.abs((monthly_combined_results_pa['Actual'] - monthly_combined_results_pa['Predicted']) / monthly_combined_results_pa['Actual'])) * 100
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 4.13%
