In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.tsa.arima.model import ARIMA
from scipy.stats import gamma

In [15]:
# Load data
peserta = pd.read_excel('Data Kepesertaan.xlsx', engine='openpyxl',skiprows=1)
non_cbgs = pd.read_excel('Data Proyeksi Beban.xlsx', sheet_name='Non-CBGs', engine='openpyxl',skiprows=1)
non_kapitasi = pd.read_excel('Data Proyeksi Beban.xlsx', sheet_name='Non Kapitasi', engine='openpyxl',skiprows=1)
promprev = pd.read_excel('Data Proyeksi Beban.xlsx', sheet_name='Promprev', engine='openpyxl',skiprows=1)

MODEL 1 : Moving Average

In [None]:
def moving_average(peserta,biaya) :
    # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Define category mappings
    kelas_1_idx = [2,4,6,8,11,13]
    kelas_2_idx = [3,5,7,9,12,14]
    kelas_3_idx = [0,1,10,15]

    def group_columns(data, indices):
        return np.mean(data[:, indices], axis=1)

    # Aggregate participants and costs by class
    peserta_kelas_1 = group_columns(array_peserta, kelas_1_idx)
    peserta_kelas_2 = group_columns(array_peserta, kelas_2_idx)
    peserta_kelas_3 = group_columns(array_peserta, kelas_3_idx)

    biaya_kelas_1 = group_columns(array_biaya, kelas_1_idx[:len(array_biaya[0])])
    biaya_kelas_2 = group_columns(array_biaya, kelas_2_idx[:len(array_biaya[0])])
    biaya_kelas_3 = group_columns(array_biaya, kelas_3_idx[:len(array_biaya[0])])

    # Compute CPMPM for available periods
    cpmpm_kelas_1 = biaya_kelas_1 / peserta_kelas_1[:98]
    cpmpm_kelas_2 = biaya_kelas_2 / peserta_kelas_2[:98]
    cpmpm_kelas_3 = biaya_kelas_3 / peserta_kelas_3[:98]

    # Forecast CPMPM using modified moving average model
    def forecast_cpmpm(cpmpm):
        cpmpm_forecast = list(cpmpm)
        for i in range(98, 180):
            avg_growth = np.mean([np.log(cpmpm_forecast[k] / cpmpm_forecast[k-1]) for k in range(1, i)])
            avg_cpmpm = np.mean(cpmpm_forecast[-12:])
            cpmpm_forecast.append(avg_cpmpm * (1 + avg_growth))
        return np.array(cpmpm_forecast)

    cpmpm_kelas_1 = forecast_cpmpm(cpmpm_kelas_1)
    cpmpm_kelas_2 = forecast_cpmpm(cpmpm_kelas_2)
    cpmpm_kelas_3 = forecast_cpmpm(cpmpm_kelas_3)

    # Compute projected costs
    biaya_kelas_1 = cpmpm_kelas_1 * peserta_kelas_1
    biaya_kelas_2 = cpmpm_kelas_2 * peserta_kelas_2
    biaya_kelas_3 = cpmpm_kelas_3 * peserta_kelas_3

    # Convert to yearly periods
    month_in_year = 12

    def yearly_average(data):
        valid_length = (len(data) // month_in_year) * month_in_year
        return data[:valid_length].reshape(-1, month_in_year).mean(axis=1)

    cpmpm_annual_kelas_1 = yearly_average(cpmpm_kelas_1)
    cpmpm_annual_kelas_2 = yearly_average(cpmpm_kelas_2)
    cpmpm_annual_kelas_3 = yearly_average(cpmpm_kelas_3)

    biaya_annual_kelas_1 = yearly_average(biaya_kelas_1)
    biaya_annual_kelas_2 = yearly_average(biaya_kelas_2)
    biaya_annual_kelas_3 = yearly_average(biaya_kelas_3)

    # Plot results
    years = np.arange(2016, 2016 + len(cpmpm_annual_kelas_1))
    plt.figure(figsize=(10,5))
    plt.plot(years, cpmpm_annual_kelas_1, label='Kelas 1')
    plt.plot(years, cpmpm_annual_kelas_2, label='Kelas 2')
    plt.plot(years, cpmpm_annual_kelas_3, label='Kelas 3')
    plt.xlabel('Tahun')
    plt.ylabel('CPMPM')
    plt.title('Proyeksi CPMPM Tahunan')
    plt.legend()
    plt.show()

    # Export results
    #data_cpmpm = pd.DataFrame({'Tahun': years, 'Kelas 1': cpmpm_annual_kelas_1, 'Kelas 2': cpmpm_annual_kelas_2, 'Kelas 3': cpmpm_annual_kelas_3})
    #data_cpmpm.to_excel("CPMPM_Annual.xlsx", index=False)

    #data_biaya = pd.DataFrame({'Tahun': years, 'Kelas 1': biaya_annual_kelas_1, 'Kelas 2': biaya_annual_kelas_2, 'Kelas 3': biaya_annual_kelas_3})
    #data_biaya.to_excel("Biaya_Annual.xlsx", index=False)

#main code
moving_average(peserta,non_cbgs)

MODEL 2 : Geometrik (CAGR - Compound Annual Growth Rate)

In [None]:
def cgr(peserta,biaya) :
    # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Define category mappings
    kelas_1_idx = [2,4,6,8,11,13]
    kelas_2_idx = [3,5,7,9,12,14]
    kelas_3_idx = [0,1,10,15]

    def group_columns(data, indices):
        return np.mean(data[:, indices], axis=1)

    # Aggregate participants and costs by class
    peserta_kelas_1 = group_columns(array_peserta, kelas_1_idx)
    peserta_kelas_2 = group_columns(array_peserta, kelas_2_idx)
    peserta_kelas_3 = group_columns(array_peserta, kelas_3_idx)

    biaya_kelas_1 = group_columns(array_biaya, kelas_1_idx[:len(array_biaya[0])])
    biaya_kelas_2 = group_columns(array_biaya, kelas_2_idx[:len(array_biaya[0])])
    biaya_kelas_3 = group_columns(array_biaya, kelas_3_idx[:len(array_biaya[0])])

    # Compute CPMPM for available periods
    cpmpm_kelas_1 = biaya_kelas_1 / peserta_kelas_1[:98]
    cpmpm_kelas_2 = biaya_kelas_2 / peserta_kelas_2[:98]
    cpmpm_kelas_3 = biaya_kelas_3 / peserta_kelas_3[:98]

    # Forecast CPMPM using CAGR model
    initial_value_1 = cpmpm_kelas_1[0]
    final_value_1 = cpmpm_kelas_1[-1]
    n_1 = len(cpmpm_kelas_1) - 1
    cagr_1 = (final_value_1 / initial_value_1) ** (1/n_1) - 1

    initial_value_2 = cpmpm_kelas_2[0]
    final_value_2 = cpmpm_kelas_2[-1]
    n_2 = len(cpmpm_kelas_2) - 1
    cagr_2 = (final_value_2 / initial_value_2) ** (1/n_2) - 1

    initial_value_3 = cpmpm_kelas_3[0]
    final_value_3 = cpmpm_kelas_3[-1]
    n_3 = len(cpmpm_kelas_3) - 1
    cagr_3 = (final_value_3 / initial_value_3) ** (1/n_3) - 1

    for i in range(98, 180):
        cpmpm_kelas_1 = np.append(cpmpm_kelas_1, cpmpm_kelas_1[-1] * (1 + cagr_1))
        cpmpm_kelas_2 = np.append(cpmpm_kelas_2, cpmpm_kelas_2[-1] * (1 + cagr_2))
        cpmpm_kelas_3 = np.append(cpmpm_kelas_3, cpmpm_kelas_3[-1] * (1 + cagr_3))

    # Compute projected costs
    biaya_kelas_1 = cpmpm_kelas_1 * peserta_kelas_1
    biaya_kelas_2 = cpmpm_kelas_2 * peserta_kelas_2
    biaya_kelas_3 = cpmpm_kelas_3 * peserta_kelas_3

    # Convert to yearly periods
    month_in_year = 12
    def yearly_average(data):
        valid_length = (len(data) // month_in_year) * month_in_year
        return data[:valid_length].reshape(-1, month_in_year).mean(axis=1)

    cpmpm_annual_kelas_1 = yearly_average(cpmpm_kelas_1)
    cpmpm_annual_kelas_2 = yearly_average(cpmpm_kelas_2)
    cpmpm_annual_kelas_3 = yearly_average(cpmpm_kelas_3)

    biaya_annual_kelas_1 = yearly_average(biaya_kelas_1)
    biaya_annual_kelas_2 = yearly_average(biaya_kelas_2)
    biaya_annual_kelas_3 = yearly_average(biaya_kelas_3)

    # Plot results
    years = np.arange(2016, 2016 + len(cpmpm_annual_kelas_1))
    plt.figure(figsize=(10,5))
    plt.plot(years, cpmpm_annual_kelas_1, label='Kelas 1')
    plt.plot(years, cpmpm_annual_kelas_2, label='Kelas 2')
    plt.plot(years, cpmpm_annual_kelas_3, label='Kelas 3')
    plt.xlabel('Tahun')
    plt.ylabel('CPMPM')
    plt.title('Proyeksi CPMPM Tahunan (CAGR)')
    plt.legend()
    plt.show()

cgr(peserta,non_cbgs)

MODEL 3 : Regresi Linear

In [None]:
def regresi_linear(peserta,biaya) :
    # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Define category mappings
    kelas_1_idx = [2,4,6,8,11,13]
    kelas_2_idx = [3,5,7,9,12,14]
    kelas_3_idx = [0,1,10,15]

    def group_columns(data, indices):
        return np.mean(data[:, indices], axis=1)

    # Aggregate participants and costs by class
    peserta_kelas_1 = group_columns(array_peserta, kelas_1_idx)
    peserta_kelas_2 = group_columns(array_peserta, kelas_2_idx)
    peserta_kelas_3 = group_columns(array_peserta, kelas_3_idx)

    biaya_kelas_1 = group_columns(array_biaya, kelas_1_idx[:len(array_biaya[0])])
    biaya_kelas_2 = group_columns(array_biaya, kelas_2_idx[:len(array_biaya[0])])
    biaya_kelas_3 = group_columns(array_biaya, kelas_3_idx[:len(array_biaya[0])])

    # Compute CPMPM for available periods
    cpmpm_kelas_1 = biaya_kelas_1 / peserta_kelas_1[:98]
    cpmpm_kelas_2 = biaya_kelas_2 / peserta_kelas_2[:98]
    cpmpm_kelas_3 = biaya_kelas_3 / peserta_kelas_3[:98]

    # Forecast CPMPM using linear regression model
    def forecast_cpmpm_linear(cpmpm):
        X = np.arange(len(cpmpm)).reshape(-1, 1)
        y = cpmpm.reshape(-1, 1)
        model = LinearRegression().fit(X, y)
        future_X = np.arange(98, 180).reshape(-1, 1)
        future_cpmpm = model.predict(future_X).flatten()
        return np.concatenate((cpmpm, future_cpmpm))

    cpmpm_kelas_1 = forecast_cpmpm_linear(cpmpm_kelas_1)
    cpmpm_kelas_2 = forecast_cpmpm_linear(cpmpm_kelas_2)
    cpmpm_kelas_3 = forecast_cpmpm_linear(cpmpm_kelas_3)

    # Compute projected costs
    biaya_kelas_1 = cpmpm_kelas_1 * peserta_kelas_1
    biaya_kelas_2 = cpmpm_kelas_2 * peserta_kelas_2
    biaya_kelas_3 = cpmpm_kelas_3 * peserta_kelas_3

    # Convert to yearly periods
    month_in_year = 12
    def yearly_average(data):
        valid_length = (len(data) // month_in_year) * month_in_year
        return data[:valid_length].reshape(-1, month_in_year).mean(axis=1)

    cpmpm_annual_kelas_1 = yearly_average(cpmpm_kelas_1)
    cpmpm_annual_kelas_2 = yearly_average(cpmpm_kelas_2)
    cpmpm_annual_kelas_3 = yearly_average(cpmpm_kelas_3)

    biaya_annual_kelas_1 = yearly_average(biaya_kelas_1)
    biaya_annual_kelas_2 = yearly_average(biaya_kelas_2)
    biaya_annual_kelas_3 = yearly_average(biaya_kelas_3)

    # Plot results
    years = np.arange(2016, 2016 + len(cpmpm_annual_kelas_1))
    plt.figure(figsize=(10,5))
    plt.plot(years, cpmpm_annual_kelas_1, label='Kelas 1')
    plt.plot(years, cpmpm_annual_kelas_2, label='Kelas 2')
    plt.plot(years, cpmpm_annual_kelas_3, label='Kelas 3')
    plt.xlabel('Tahun')
    plt.ylabel('CPMPM')
    plt.title('Proyeksi CPMPM Tahunan (Linear Regression)')
    plt.legend()
    plt.show()

regresi_linear(peserta,non_cbgs)

MODEL 4 : ARIMA (AutoRegressive Integrated Moving Average)

In [None]:
def arima(peserta,biaya) :
    # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Define category mappings
    kelas_1_idx = [2,4,6,8,11,13]
    kelas_2_idx = [3,5,7,9,12,14]
    kelas_3_idx = [0,1,10,15]

    def group_columns(data, indices):
        return np.mean(data[:, indices], axis=1)

    # Aggregate participants and costs by class
    peserta_kelas_1 = group_columns(array_peserta, kelas_1_idx)
    peserta_kelas_2 = group_columns(array_peserta, kelas_2_idx)
    peserta_kelas_3 = group_columns(array_peserta, kelas_3_idx)

    biaya_kelas_1 = group_columns(array_biaya, kelas_1_idx[:len(array_biaya[0])])
    biaya_kelas_2 = group_columns(array_biaya, kelas_2_idx[:len(array_biaya[0])])
    biaya_kelas_3 = group_columns(array_biaya, kelas_3_idx[:len(array_biaya[0])])

    # Compute CPMPM for available periods
    cpmpm_kelas_1 = biaya_kelas_1 / peserta_kelas_1[:98]
    cpmpm_kelas_2 = biaya_kelas_2 / peserta_kelas_2[:98]
    cpmpm_kelas_3 = biaya_kelas_3 / peserta_kelas_3[:98]

    # Forecast CPMPM using ARIMA with fixed parameters

    def forecast_cpmpm_arima(cpmpm, order=(1,0,1)):
        model = ARIMA(cpmpm, order=order)
        model_fit = model.fit()
        future_cpmpm = model_fit.forecast(steps=82)
        return np.concatenate((cpmpm, future_cpmpm))

    cpmpm_kelas_1 = forecast_cpmpm_arima(cpmpm_kelas_1)
    cpmpm_kelas_2 = forecast_cpmpm_arima(cpmpm_kelas_2)
    cpmpm_kelas_3 = forecast_cpmpm_arima(cpmpm_kelas_3)

    # Compute projected costs
    biaya_kelas_1 = cpmpm_kelas_1 * peserta_kelas_1
    biaya_kelas_2 = cpmpm_kelas_2 * peserta_kelas_2
    biaya_kelas_3 = cpmpm_kelas_3 * peserta_kelas_3

    # Convert to yearly periods
    month_in_year = 12
    def yearly_average(data):
        valid_length = (len(data) // month_in_year) * month_in_year
        return data[:valid_length].reshape(-1, month_in_year).mean(axis=1)

    cpmpm_annual_kelas_1 = yearly_average(cpmpm_kelas_1)
    cpmpm_annual_kelas_2 = yearly_average(cpmpm_kelas_2)
    cpmpm_annual_kelas_3 = yearly_average(cpmpm_kelas_3)

    biaya_annual_kelas_1 = yearly_average(biaya_kelas_1)
    biaya_annual_kelas_2 = yearly_average(biaya_kelas_2)
    biaya_annual_kelas_3 = yearly_average(biaya_kelas_3)

    # Plot results
    years = np.arange(2016, 2016 + len(cpmpm_annual_kelas_1))
    plt.figure(figsize=(10,5))
    plt.plot(years, cpmpm_annual_kelas_1, label='Kelas 1')
    plt.plot(years, cpmpm_annual_kelas_2, label='Kelas 2')
    plt.plot(years, cpmpm_annual_kelas_3, label='Kelas 3')
    plt.xlabel('Tahun')
    plt.ylabel('CPMPM')
    plt.title('Proyeksi CPMPM Tahunan (ARIMA)')
    plt.legend()
    plt.show()


arima(peserta,non_cbgs)

MODEL 5 : Uji Distribusi & Rata-rata Sesuai Distribusi

In [None]:
def distribusi(peserta,biaya):
# Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Define category mappings
    kelas_1_idx = [2,4,6,8,11,13]
    kelas_2_idx = [3,5,7,9,12,14]
    kelas_3_idx = [0,1,10,15]

    def group_columns(data, indices):
        return np.mean(data[:, indices], axis=1)

    # Aggregate participants and costs by class
    peserta_kelas_1 = group_columns(array_peserta, kelas_1_idx)
    peserta_kelas_2 = group_columns(array_peserta, kelas_2_idx)
    peserta_kelas_3 = group_columns(array_peserta, kelas_3_idx)

    biaya_kelas_1 = group_columns(array_biaya, kelas_1_idx[:len(array_biaya[0])])
    biaya_kelas_2 = group_columns(array_biaya, kelas_2_idx[:len(array_biaya[0])])
    biaya_kelas_3 = group_columns(array_biaya, kelas_3_idx[:len(array_biaya[0])])

    # Compute CPMPM for available periods
    cpmpm_kelas_1 = biaya_kelas_1 / peserta_kelas_1[:98]
    cpmpm_kelas_2 = biaya_kelas_2 / peserta_kelas_2[:98]
    cpmpm_kelas_3 = biaya_kelas_3 / peserta_kelas_3[:98]

    # Fit distributions
    possible_distributions = ['norm', 'expon', 'gamma', 'lognorm']
    def best_fit_distribution(data):
        best_dist = None
        best_ks_stat = np.inf
        
        for dist_name in possible_distributions:
            dist = getattr(stats, dist_name)
            params = dist.fit(data)
            ks_stat, _ = stats.kstest(data, dist_name, args=params)
            
            if ks_stat < best_ks_stat:
                best_ks_stat = ks_stat
                best_dist = (dist_name, params)
        print(best_dist)
        return best_dist

    best_dist_kelas_1 = best_fit_distribution(cpmpm_kelas_1)
    best_dist_kelas_2 = best_fit_distribution(cpmpm_kelas_2)
    best_dist_kelas_3 = best_fit_distribution(cpmpm_kelas_3)

    # Generate future CPMPM values based on fitted distributions
    def generate_cpmpm(best_dist, size):
        dist_name, params = best_dist
        dist = getattr(stats, dist_name)
        return dist.rvs(*params, size=size)

    future_cpmpm_kelas_1 = generate_cpmpm(best_dist_kelas_1, 82)
    future_cpmpm_kelas_2 = generate_cpmpm(best_dist_kelas_2, 82)
    future_cpmpm_kelas_3 = generate_cpmpm(best_dist_kelas_3, 82)

    cpmpm_kelas_1 = np.concatenate((cpmpm_kelas_1, future_cpmpm_kelas_1))
    cpmpm_kelas_2 = np.concatenate((cpmpm_kelas_2, future_cpmpm_kelas_2))
    cpmpm_kelas_3 = np.concatenate((cpmpm_kelas_3, future_cpmpm_kelas_3))

    # Compute projected costs
    biaya_kelas_1 = cpmpm_kelas_1 * peserta_kelas_1
    biaya_kelas_2 = cpmpm_kelas_2 * peserta_kelas_2
    biaya_kelas_3 = cpmpm_kelas_3 * peserta_kelas_3

    # Convert to yearly periods
    month_in_year = 12
    def yearly_average(data):
        valid_length = (len(data) // month_in_year) * month_in_year
        return data[:valid_length].reshape(-1, month_in_year).mean(axis=1)

    cpmpm_annual_kelas_1 = yearly_average(cpmpm_kelas_1)
    cpmpm_annual_kelas_2 = yearly_average(cpmpm_kelas_2)
    cpmpm_annual_kelas_3 = yearly_average(cpmpm_kelas_3)

    biaya_annual_kelas_1 = yearly_average(biaya_kelas_1)
    biaya_annual_kelas_2 = yearly_average(biaya_kelas_2)
    biaya_annual_kelas_3 = yearly_average(biaya_kelas_3)

    # Plot results
    years = np.arange(2016, 2016 + len(cpmpm_annual_kelas_1))
    plt.figure(figsize=(10,5))
    plt.plot(years, cpmpm_annual_kelas_1, label='Kelas 1')
    plt.plot(years, cpmpm_annual_kelas_2, label='Kelas 2')
    plt.plot(years, cpmpm_annual_kelas_3, label='Kelas 3')
    plt.xlabel('Tahun')
    plt.ylabel('CPMPM')
    plt.title('Proyeksi CPMPM Tahunan (Distribusi Terbaik)')
    plt.legend()
    plt.show()


distribusi(peserta,non_cbgs)

MODEL 6 : Gaussian Process Regression (GPR)

In [None]:
def gpr(peserta,biaya) :
   # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Define category mappings
    kelas_1_idx = [2,4,6,8,11,13]
    kelas_2_idx = [3,5,7,9,12,14]
    kelas_3_idx = [0,1,10,15]

    def group_columns(data, indices):
        return np.mean(data[:, indices], axis=1)

    # Aggregate participants and costs by class
    peserta_kelas_1 = group_columns(array_peserta, kelas_1_idx)
    peserta_kelas_2 = group_columns(array_peserta, kelas_2_idx)
    peserta_kelas_3 = group_columns(array_peserta, kelas_3_idx)

    biaya_kelas_1 = group_columns(array_biaya, kelas_1_idx[:len(array_biaya[0])])
    biaya_kelas_2 = group_columns(array_biaya, kelas_2_idx[:len(array_biaya[0])])
    biaya_kelas_3 = group_columns(array_biaya, kelas_3_idx[:len(array_biaya[0])])

    # Compute CPMPM for available periods
    cpmpm_kelas_1 = biaya_kelas_1 / peserta_kelas_1[:98]
    cpmpm_kelas_2 = biaya_kelas_2 / peserta_kelas_2[:98]
    cpmpm_kelas_3 = biaya_kelas_3 / peserta_kelas_3[:98]

    # Gaussian Process Regression for CPMPM Projection
    def gpr_forecast(cpmpm_data, periods=82):
        X = np.arange(len(cpmpm_data)).reshape(-1, 1)
        y = cpmpm_data
        kernel = C(1.0, (1e-2, 1e3)) * RBF(10, (1e-2, 1e3))
        gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
        gpr.fit(X, y)
        
        future_X = np.arange(len(cpmpm_data), len(cpmpm_data) + periods).reshape(-1, 1)
        future_cpmpm, _ = gpr.predict(future_X, return_std=True)
        
        return future_cpmpm

    future_cpmpm_kelas_1 = gpr_forecast(cpmpm_kelas_1)
    future_cpmpm_kelas_2 = gpr_forecast(cpmpm_kelas_2)
    future_cpmpm_kelas_3 = gpr_forecast(cpmpm_kelas_3)

    cpmpm_kelas_1 = np.concatenate((cpmpm_kelas_1, future_cpmpm_kelas_1))
    cpmpm_kelas_2 = np.concatenate((cpmpm_kelas_2, future_cpmpm_kelas_2))
    cpmpm_kelas_3 = np.concatenate((cpmpm_kelas_3, future_cpmpm_kelas_3))

    # Compute projected costs
    biaya_kelas_1 = cpmpm_kelas_1 * peserta_kelas_1
    biaya_kelas_2 = cpmpm_kelas_2 * peserta_kelas_2
    biaya_kelas_3 = cpmpm_kelas_3 * peserta_kelas_3

    # Convert to yearly periods
    month_in_year = 12
    def yearly_average(data):
        valid_length = (len(data) // month_in_year) * month_in_year
        return data[:valid_length].reshape(-1, month_in_year).mean(axis=1)

    cpmpm_annual_kelas_1 = yearly_average(cpmpm_kelas_1)
    cpmpm_annual_kelas_2 = yearly_average(cpmpm_kelas_2)
    cpmpm_annual_kelas_3 = yearly_average(cpmpm_kelas_3)

    biaya_annual_kelas_1 = yearly_average(biaya_kelas_1)
    biaya_annual_kelas_2 = yearly_average(biaya_kelas_2)
    biaya_annual_kelas_3 = yearly_average(biaya_kelas_3)

    # Plot results
    years = np.arange(2016, 2016 + len(cpmpm_annual_kelas_1))
    plt.figure(figsize=(10,5))
    plt.plot(years, cpmpm_annual_kelas_1, label='Kelas 1')
    plt.plot(years, cpmpm_annual_kelas_2, label='Kelas 2')
    plt.plot(years, cpmpm_annual_kelas_3, label='Kelas 3')
    plt.xlabel('Tahun')
    plt.ylabel('CPMPM')
    plt.title('Proyeksi CPMPM Tahunan (Gaussian Process Regression)')
    plt.legend()
    plt.show()
 
gpr(peserta,non_cbgs)

In [None]:
def sma(peserta,biaya) :
    # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Ensure biaya and peserta have matching dimensions (Jan 2016 - Dec 2022, 84 months)
    array_peserta = array_peserta[:84, :]
    array_biaya = array_biaya[:84, :]

    # Compute CPMPM historical (Jan 2016 - Dec 2022)
    cpm_historic = array_biaya / array_peserta
    print(cpm_historic)
    # Aggregate categories into 3 classes
    kelas_1_indices = [2, 4, 6, 8, 11, 13]
    kelas_2_indices = [3, 5, 7, 9, 12, 14]
    kelas_3_indices = [0, 1, 10, 15]

    cpm_kelas_1 = np.mean(cpm_historic[:, kelas_1_indices], axis=1)
    cpm_kelas_2 = np.mean(cpm_historic[:, kelas_2_indices], axis=1)
    cpm_kelas_3 = np.mean(cpm_historic[:, kelas_3_indices], axis=1)

    cpm_historic = np.vstack([cpm_kelas_1, cpm_kelas_2, cpm_kelas_3])

    # Initialize projection list
    cpmpm_proj = list(cpm_historic.T)

    # Projections using SMA 1 with dynamically updated avg growth (Jan 2023 - Dec 2030)
    n_future = 96
    for i in range(n_future):
        sma_12 = np.mean(cpmpm_proj[-12:], axis=0)
        avg_growth = np.mean(np.log(np.array(cpmpm_proj[1:]) / np.array(cpmpm_proj[:-1])), axis=0)  # Dihitung dari seluruh bulan sebelumnya
        new_value = sma_12 * np.exp(avg_growth)
        cpmpm_proj.append(new_value)

    cpmpm_proj = np.array(cpmpm_proj).T

    # Convert to yearly CPMPM (averaging 12 months per year)
    def to_yearly_cpmpm(data):
        return np.array([np.mean(data[i:i+12]) for i in range(0, len(data), 12)])

    # Hitung CPMPM tahunan untuk masing-masing kelas
    cpmpm_yearly = np.array([to_yearly_cpmpm(cpmpm_proj[i]) for i in range(cpmpm_proj.shape[0])])

    # Sesuaikan tahun berdasarkan panjang data tahunan
    years = np.arange(2016, 2016 + cpmpm_yearly.shape[1])

    # Debug prints
    print("Years shape:", years.shape)
    print("CPMPM yearly shape:", cpmpm_yearly.shape)

    # Plot results
    plt.figure(figsize=(10, 5))
    for i in range(cpmpm_yearly.shape[0]):
        plt.plot(years, cpmpm_yearly[i], marker='o', label=f'CPMPM Kelas {i+1}')
    plt.xlabel('Year')
    plt.ylabel('CPMPM')
    plt.title('CPMPM Projection Promprev using SMA')
    plt.legend()
    plt.grid()
    plt.show()

    # Export to Excel
    df_export = pd.DataFrame(cpmpm_yearly.T, columns=[f'CPMPM Kelas {i+1}' for i in range(cpmpm_yearly.shape[0])])
    df_export.insert(0, 'Year', years)
    df_export.to_excel("CPMPM_Projection_SMA_Promprev.xlsx", index=False)

sma(peserta, promprev)

In [13]:
def ARIMA(peserta,biaya) :
  # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Ensure biaya and peserta have matching dimensions (Jan 2016 - Dec 2022, 84 months)
    array_peserta = array_peserta[:84, :]
    array_biaya = array_biaya[:84, :]

    # Compute CPMPM historical with safe division
    cpm_historic = np.divide(array_biaya, array_peserta, where=array_peserta!=0)

    # Aggregate into 3 classes using mean instead of sum
    kelas = [
        np.mean(cpm_historic[:, [2, 4, 6, 8, 11, 13]], axis=1),
        np.mean(cpm_historic[:, [3, 5, 7, 9, 12, 14]], axis=1),
        np.mean(cpm_historic[:, [0, 1, 10, 15]], axis=1)
    ]

    array_cpmpm = np.vstack(kelas).T

    # Fit ARIMA model for each class and forecast
    def forecast_arima(data, n_future=96):
        model = ARIMA(data,(1,0,1))  # ARIMA(p=1, d=1, q=1)
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=n_future)
        return forecast

    # Forecast future CPMPM values
    cpmpm_proj = np.zeros((array_cpmpm.shape[0] + 96, array_cpmpm.shape[1]))
    cpmpm_proj[:array_cpmpm.shape[0], :] = array_cpmpm

    for i in range(array_cpmpm.shape[1]):
        cpmpm_proj[array_cpmpm.shape[0]:, i] = forecast_arima(array_cpmpm[:, i])

    # Convert to yearly CPMPM (averaging 12 months per year)
    def to_yearly_cpmpm(data):
        if len(data) < 12:
            return np.array([np.mean(data)])
        return np.array([np.mean(data[i:i+12]) for i in range(0, len(data) - 11, 12)])

    cpmpm_yearly = np.array([to_yearly_cpmpm(cpmpm_proj[:, i]) for i in range(cpmpm_proj.shape[1])])
    # Adjust years based on yearly data length
    years = np.arange(2016, 2016 + cpmpm_yearly.shape[1])

    # Plot results
    plt.figure(figsize=(10, 5))
    for i in range(cpmpm_yearly.shape[0]):
        plt.plot(years, cpmpm_yearly[i], marker='o', label=f'CPMPM Kelas {i+1}')
    plt.xlabel('Year')
    plt.ylabel('CPMPM')
    plt.title('CPMPM Projection Non-CBGs using ARIMA')
    plt.legend()
    plt.grid()
    plt.show()

    # Export to Excel
    df_export = pd.DataFrame(cpmpm_yearly.T, columns=[f'CPMPM Kelas {i+1}' for i in range(cpmpm_yearly.shape[0])])
    df_export.insert(0, 'Year', years)
    df_export.to_excel("cpmpm_projection_arima_non_cbgs.xlsx", index=False)

ARIMA(peserta,non_cbgs)

AttributeError: 'numpy.ndarray' object has no attribute 'to_numpy'

In [None]:
def mc_norm(peserta,biaya) :
    # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Ensure biaya and peserta have matching dimensions (Jan 2016 - Dec 2022, 84 months)
    array_peserta = array_peserta[:84, :]
    array_biaya = array_biaya[:84, :]

    # Compute CPMPM historical with safe division
    cpm_historic = np.divide(array_biaya, array_peserta, where=array_peserta!=0)

    # Aggregate into 3 classes using mean instead of sum
    kelas = [
        np.mean(cpm_historic[:, [2, 4, 6, 8, 11, 13]], axis=1),
        np.mean(cpm_historic[:, [3, 5, 7, 9, 12, 14]], axis=1),
        np.mean(cpm_historic[:, [0, 1, 10, 15]], axis=1)
    ]

    array_cpmpm = np.vstack(kelas).T

    # Monte Carlo Simulation parameters
    n_future = 96  # Forecasting 96 months ahead
    n_simulations = 1000  # Number of Monte Carlo simulations

    # Function to generate Monte Carlo simulations
    def monte_carlo_forecast(data, n_future, n_simulations):
        mean = np.mean(data)
        std_dev = np.std(data)
        simulations = np.random.normal(mean, std_dev, (n_future, n_simulations))
        return np.mean(simulations, axis=1)  # Take mean over simulations

    # Forecast future CPMPM values using Monte Carlo
    cpmpm_proj = np.zeros((array_cpmpm.shape[0] + n_future, array_cpmpm.shape[1]))
    cpmpm_proj[:array_cpmpm.shape[0], :] = array_cpmpm

    for i in range(array_cpmpm.shape[1]):
        cpmpm_proj[array_cpmpm.shape[0]:, i] = monte_carlo_forecast(array_cpmpm[:, i], n_future, n_simulations)

    # Convert to yearly CPMPM (averaging 12 months per year)
    def to_yearly_cpmpm(data):
        if len(data) < 12:
            return np.array([np.mean(data)])
        return np.array([np.mean(data[i:i+12]) for i in range(0, len(data) - 11, 12)])

    cpmpm_yearly = np.array([to_yearly_cpmpm(cpmpm_proj[:, i]) for i in range(cpmpm_proj.shape[1])])
    # Adjust years based on yearly data length
    years = np.arange(2016, 2016 + cpmpm_yearly.shape[1])

    # Plot results
    plt.figure(figsize=(10, 5))
    for i in range(cpmpm_yearly.shape[0]):
        plt.plot(years, cpmpm_yearly[i], marker='o', label=f'CPMPM Kelas {i+1}')
    plt.xlabel('Year')
    plt.ylabel('CPMPM')
    plt.title('CPMPM Projection Promprev using Monte Carlo Simulation')
    plt.legend()
    plt.grid()
    plt.show()

    # Export to Excel
    df_export = pd.DataFrame(cpmpm_yearly.T, columns=[f'CPMPM Kelas {i+1}' for i in range(cpmpm_yearly.shape[0])])
    df_export.insert(0, 'Year', years)
    df_export.to_excel("cpmpm_projection_mc_promprev.xlsx", index=False)

mc_norm(peserta,promprev)

In [None]:
def mc_gamma (peserta, biaya) :
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from scipy.stats import gamma

    # Convert to numpy arrays
    array_peserta = peserta.to_numpy()
    array_biaya = biaya.to_numpy()

    # Ensure biaya and peserta have matching dimensions (Jan 2016 - Dec 2022, 84 months)
    array_peserta = array_peserta[:84, :]
    array_biaya = array_biaya[:84, :]

    # Compute CPMPM historical with safe division
    cpm_historic = np.divide(array_biaya, array_peserta, where=array_peserta!=0)

    # Aggregate into 3 classes using mean instead of sum
    kelas = [
        np.mean(cpm_historic[:, [2, 4, 6, 8, 11, 13]], axis=1),
        np.mean(cpm_historic[:, [3, 5, 7, 9, 12, 14]], axis=1),
        np.mean(cpm_historic[:, [0, 1, 10, 15]], axis=1)
    ]

    array_cpmpm = np.vstack(kelas).T

    # Monte Carlo Simulation parameters
    n_future = 96  # Forecasting 96 months ahead
    n_simulations = 1000  # Number of Monte Carlo simulations

    # Function to generate Monte Carlo simulations using Gamma Distribution
    def monte_carlo_gamma(data, n_future, n_simulations):
        mu = np.mean(data)  # Mean of historical data
        sigma_sq = np.var(data)  # Variance of historical data
        if sigma_sq == 0:  
            return np.full(n_future, mu)  # Avoid division by zero, return mean
        
        shape_param = (mu ** 2) / sigma_sq
        scale_param = sigma_sq / mu

        simulations = gamma.rvs(a=shape_param, scale=scale_param, size=(n_future, n_simulations))
        return np.mean(simulations, axis=1)  # Take mean over simulations

    # Forecast future CPMPM values using Monte Carlo with Gamma distribution
    cpmpm_proj = np.zeros((array_cpmpm.shape[0] + n_future, array_cpmpm.shape[1]))
    cpmpm_proj[:array_cpmpm.shape[0], :] = array_cpmpm

    for i in range(array_cpmpm.shape[1]):
        cpmpm_proj[array_cpmpm.shape[0]:, i] = monte_carlo_gamma(array_cpmpm[:, i], n_future, n_simulations)

    # Convert to yearly CPMPM (averaging 12 months per year)
    def to_yearly_cpmpm(data):
        if len(data) < 12:
            return np.array([np.mean(data)])
        return np.array([np.mean(data[i:i+12]) for i in range(0, len(data) - 11, 12)])

    cpmpm_yearly = np.array([to_yearly_cpmpm(cpmpm_proj[:, i]) for i in range(cpmpm_proj.shape[1])])
    # Adjust years based on yearly data length
    years = np.arange(2016, 2016 + cpmpm_yearly.shape[1])

    # Plot results
    plt.figure(figsize=(10, 5))
    for i in range(cpmpm_yearly.shape[0]):
        plt.plot(years, cpmpm_yearly[i], marker='o', label=f'CPMPM Kelas {i+1}')
    plt.xlabel('Year')
    plt.ylabel('CPMPM')
    plt.title('CPMPM Projection Promprev using Monte Carlo (Gamma Distribution)')
    plt.legend()
    plt.grid()
    plt.show()

    # Export to Excel
    df_export = pd.DataFrame(cpmpm_yearly.T, columns=[f'CPMPM Kelas {i+1}' for i in range(cpmpm_yearly.shape[0])])
    df_export.insert(0, 'Year', years)
    df_export.to_excel("cpmpm_projection_mcgamma_promprev.xlsx", index=False)

mc_gamma(peserta,promprev)