In [None]:
import os
import gc
import pickle
import statsmodels.api as sm
import pandas as pd
import numpy as np
import math
import seaborn as sns
from seaborn_analyzer import regplot
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import japanize_matplotlib
from openpyxl import load_workbook
import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='matplotlib')
from datetime import datetime, timedelta
from scipy import stats
from scipy.optimize import minimize_scalar
import pulp
from pulp import LpProblem, LpMinimize, LpVariable, lpSum
# pd.options.display.max_rows = True

#### 元データの読み出し

In [None]:
df = pd.read_excel('/workspaces/data/plan/plan.xlsx',header=1)
df = df[['日付', 'ip', 'ip_max', 'st', 'st_max', 'ha', 'ha_max']]
df.columns = ['time', 'ip', 'ip_max', 'st', 'st_max', 'ha', 'ha_max']
df['time'] = pd.to_datetime(df['time'], format='%Y%m%d')
df = df.set_index('time')
df = df.round({'ip': 0,
                'st': 0,
                'ha': 0})
df = df.fillna(0)

#### テストデータ読み出し

In [None]:
df = pd.read_csv('/workspaces/dataset/test_data.csv')
df = df[['time', 'ip', 'ip_max', 'st', 'ha']]
df['time'] = pd.to_datetime(df['time'], format='%Y-%m-%d')
df = df.set_index('time')

#### テスト用サンプルデータ作成

In [None]:
start = '2023-04-1'
end = '2023-7-4'
date_range = pd.date_range(start=start, end=end, freq='D')
a = np.round(np.random.uniform(low=12000, high=27000, size=27))
b = np.round(np.random.uniform(low=25000, high=40000, size=14))
c = np.round(np.random.uniform(low=25000, high=50000, size=3))
d = np.round(np.random.uniform(low=14000, high=18000, size=12))
e = np.round(np.random.uniform(low=29000, high=44000, size=20))
f = np.round(np.random.uniform(low=18000, high=21000, size=19))
g = np.concatenate([a, b, c, d, e, f], axis=0)

a = np.full(shape=27,fill_value=32000)
b = np.full(shape=14,fill_value=38000)
c = np.full(shape=3,fill_value=38000)
d = np.full(shape=12,fill_value=32000)
e = np.full(shape=20,fill_value=38000)
f = np.full(shape=19,fill_value=32000)
h = np.concatenate([a, b, c, d, e, f], axis=0)

data = {'time': date_range,
        'ip': g,
        'ip_max': h,
        'st':np.zeros(len(date_range)),
        'ha': np.zeros(len(date_range))}

df = pd.DataFrame(data)
df.to_csv('/workspaces/dataset/test_data.csv', index=False)

#### MILPによる計画最適化の実行

In [None]:
def milp_optimization(Y_input: pd.Series, X_max_input: pd.Series, w1=0.1, w2=1, w3=1, w4=0.2, w5=0.05, w6=0.01):
    # Convert pd.Series to numpy arrays
    Y = Y_input.to_numpy()
    X_max = X_max_input.to_numpy()
    
    # Define problem
    prob = pulp.LpProblem("Optimization", pulp.LpMinimize)

    # Define variables and initial values
    n = len(Y)
    X = [pulp.LpVariable(f'X_{i}', lowBound=0, upBound=X_max[i], cat=pulp.LpInteger) for i in range(n)]
    Cum = [pulp.LpVariable(f'Cum_{i}', lowBound=0, cat=pulp.LpInteger) for i in range(n)]
    Max_Cum = pulp.LpVariable('Max_Cum', lowBound=0, cat=pulp.LpInteger)
    Max_X = pulp.LpVariable('Max_X', lowBound=0, cat=pulp.LpInteger)
    z = [pulp.LpVariable(f'z_{i}', cat=pulp.LpBinary) for i in range(n)]
    delta = [pulp.LpVariable(f"delta_{i}", 0, None) for i in range(n)]

    # Initialize variables
    for i in range(n):
        X[i].setInitialValue(X_max[i])
        if i == 0:
            Cum[0].setInitialValue(max(0, Y[0] - X[0].value()))
        else:
            Cum[i].setInitialValue(max(0, Cum[i - 1].value() + Y[i] - X[i].value()))

    # Define objective function
    prob += w1 * Max_Cum + w2 * Max_X + w3 * (pulp.lpSum(X)/n) + w4 * (pulp.lpSum(Cum)/n) + w5 * pulp.lpSum(Y - X), "Objective"

    # Define constraints
    M = 1000000  # Large positive constant
    for i in range(n):
        # start = max(i - 3, 0)
        # end = min(i + 4, n)
        # prob += delta[i] >= X[i] - (pulp.lpSum(X[j] for j in range(start, end)) / (end - start))
        # prob += delta[i] >= (pulp.lpSum(X[j] for j in range(start, end)) / (end - start)) - X[i]
        if i == 0:
            prob += Cum[0] >= Y[0] - X[0]
            prob += Cum[0] >= 0
            prob += Cum[0] <= Y[0] - X[0] + M * (1 - z[0])
            prob += Cum[0] <= M * z[0]
        else:
            prob += Cum[i] >= Cum[i-1] + Y[i] - X[i]
            prob += Cum[i] >= 0
            prob += Cum[i] <= Cum[i-1] + Y[i] - X[i] + M * (1 - z[i])
            prob += Cum[i] <= M * z[i]

        prob += Max_Cum >= Cum[i]
        prob += Max_X >= X[i]
        
    prob += pulp.lpSum(X - Y) <= 0
    # prob += pulp.abs(pulp.lpSum(Y) - pulp.liSum(X)  )

    # Solve the problem using SCIP solver
    scip_solver = pulp.SCIP(msg=True)
    prob.solve(scip_solver)

    result = pd.DataFrame({'time': Y_input.index, 'Y': Y, 'X': [X[i].value() for i in range(n)], 'Cum': [Cum[i].value() for i in range(n)]})

    return result, Max_Cum, Max_X

result_df, _, _ = milp_optimization(df['ip'], df['ip_max'])
sns.lineplot(data=result_df)