In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from scipy.interpolate import CubicSpline
from itertools import product
from sklearn.metrics import r2_score
import csv
from vmd import VMD

In [2]:
# adjusted R-squared
def r2_score_adjusted(y, y_pred, featurecount):
    n = len(y)
    return 1 - (1-r2_score(y, y_pred)) * (n-1)/(n-featurecount-1)

# AIC
def calculate_aic(y, y_pred, k):
    sse = np.sum((y - y_pred) ** 2)
    n = len(y)
    aic = n * np.log(sse / n) + 2 * k
    return aic

# BIC
def calculate_bic(y, y_pred, k):
    sse = np.sum((y - y_pred) ** 2)
    n = len(y)
    bic = n * np.log(sse / n) + k * np.log(n)
    return bic

# импорт временного ряда

In [3]:
# linux path
# path1 = './'
# path2 = '/'

# windows path
path1 = '.\\'
path2 = '\\'

In [4]:
seasonsTS = ("s7", "s9", "s8", "s9")

cases = ("t7_n21_s7_s9", "t7_n21_s7_s9", "t7_n21_s8_s9", "t7_n21_s8_s9")

In [5]:
Season = list()
for season_i in seasonsTS:
    s = pd.read_csv(f"./Components/{season_i}.csv", decimal=',')
    season = pd.DataFrame(s, dtype=float)
    Season.append(season['Value'])
Component_of_TS = Season[::]
Component_name = "Season"
range_of_imfs = range(1,8)


In [6]:
index = list()
TS = list()

data = pd.read_csv(f"{path1}Cases{path2}{cases[0]}.csv", decimal=',')
df = pd.DataFrame(data, dtype=float)
index = df.index

for case_i in cases:
    data = pd.read_csv(f"{path1}Cases{path2}{case_i}.csv", decimal=',')
    df = pd.DataFrame(data, dtype=float)
    TS.append(df.values)


# тестовый анализ

In [7]:
list(range_of_imfs)

[1, 2, 3, 4, 5, 6, 7]

In [8]:
np.shape(Component_of_TS)

(4, 1500)

In [9]:
range_of_imfs[0]

1

In [10]:
TS[0]

array([[70.15810976],
       [73.69767876],
       [74.59385009],
       ...,
       [-5.87675047],
       [-5.43221575],
       [-4.61858018]], shape=(1500, 1))

In [11]:
np.shape(TS)

(4, 1500, 1)

# декомпозиция временного ряда

In [7]:
alpha = (0, 500, 1000, 2500, 5000, 7500, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000)
init = (0, 1, 2)
DC = (False, True)
K = 10

tau = 0            #BY DEFAULT  
tol = 1e-6         #BY DEFAULT

featurecount = 6

In [8]:
best_metriks = np.zeros(len(TS), dtype=dict)

best_r2_params = np.zeros(len(TS), dtype=dict)
best_r2_adj_params = np.zeros(len(TS), dtype=dict)
best_aic_params = np.zeros(len(TS), dtype=dict)
best_bic_params = np.zeros(len(TS), dtype=dict)

for i in range(len(TS)):
    best_metriks[i] = {"Time Series": f"TS{i+1}", "Season": seasonsTS[i], "R2": -np.inf, "R2_Adj": -np.inf, "AIC" : np.inf, "BIC": np.inf}
    
    best_r2_params[i] = ({"Time Series": f"TS{i+1}", "Season": seasonsTS[i], "Metric": "R2", "K": 10, "alpha": 0, "DC": False, "init": 0, "IMFs" : "1"}) 
    best_r2_adj_params[i] = ({"Time Series": f"TS{i+1}", "Season": seasonsTS[i], "Metric": "R2_Adj", "K": 10, "alpha": 0,  "DC": False, "init": 0, "IMFs" : "1"}) 
    best_aic_params[i] = ({"Time Series": f"TS{i+1}", "Season": seasonsTS[i], "Metric": "AIC", "K": 10, "alpha": 0, "DC": False, "init": 0, "IMFs" : "1"}) 
    best_bic_params[i] = ({"Time Series": f"TS{i+1}", "Season": seasonsTS[i], "Metric": "BIC", "K": 10, "alpha": 0, "DC": False, "init": 0, "IMFs" : "1"}) 



In [9]:
alpha_j = np.zeros(len(alpha)*len(init)*len(DC), dtype=int)
init_j = np.zeros(len(alpha)*len(init)*len(DC), dtype=int)
DC_j = np.zeros(len(alpha)*len(init)*len(DC), dtype=bool)

j = 0
for params_set in list(product(alpha, DC, init)):
    alpha_j[j] = params_set[0]
    DC_j[j] = params_set[1]
    init_j[j] = params_set[2]
    
    j+=1

In [10]:
r2_list = np.zeros((len(TS) ,(len(alpha)*len(DC)*len(init)), 3), dtype=float)
r2_adj_list = np.zeros((len(TS) ,(len(alpha)*len(DC)*len(init)), 3), dtype=float)
aic_list = np.zeros((len(TS) ,(len(alpha)*len(DC)*len(init)), 3), dtype=float)
bic_list = np.zeros((len(TS) ,(len(alpha)*len(DC)*len(init)), 3), dtype=float)

# для всех лчм
for ts_i in range(len(TS)):

    j=0
    # для всех наборов параметров
    for params_set in list(product(alpha, DC, init)):
        u, u_hat, omega = VMD(TS[ts_i], alpha=params_set[0], tau=tau, K=K, DC=params_set[1], init=params_set[2], tol=tol)
        
        r2 = list()
        r2_start_imf = list()
        r2_end_imf = list()

        r2_adj = list()
        r2_adj_start_imf = list()
        r2_adj_end_imf = list()

        aic = list()
        aic_start_imf = list()
        aic_end_imf = list()

        bic = list()
        bic_start_imf = list()
        bic_end_imf = list()

        for j_imfs in range_of_imfs:
            sum_of_imfs = 0
            for i_imfs in range(j_imfs, K):
                sum_of_imfs += u[i_imfs].T
                
                # # для шума
                # residual = TS[ts_i].T[0] - sum_of_imfs
                # r2.append(r2_score(Component_of_TS[ts_i], residual))
                # r2_adj.append(r2_score_adjusted(Component_of_TS[ts_i], residual, featurecount))
                # aic.append(calculate_aic(Component_of_TS[ts_i], residual, featurecount))
                # bic.append(calculate_bic(Component_of_TS[ts_i], residual, featurecount))

                # для тренда и сезона
                r2.append(r2_score(Component_of_TS[ts_i], sum_of_imfs))
                r2_start_imf.append(j_imfs)
                r2_end_imf.append(i_imfs)

                r2_adj.append(r2_score_adjusted(Component_of_TS[ts_i], sum_of_imfs, featurecount))
                r2_adj_start_imf.append(j_imfs)
                r2_adj_end_imf.append(i_imfs)

                aic.append(calculate_aic(Component_of_TS[ts_i], sum_of_imfs, featurecount))
                aic_start_imf.append(j_imfs)
                aic_end_imf.append(i_imfs)

                bic.append(calculate_bic(Component_of_TS[ts_i], sum_of_imfs, featurecount))
                bic_start_imf.append(j_imfs)
                bic_end_imf.append(i_imfs)


        r2_list[ts_i][j] = [np.max(r2), r2_start_imf[np.argmax(r2)], r2_end_imf[np.argmax(r2)]]
        r2_adj_list[ts_i][j] = [np.max(r2_adj), r2_adj_start_imf[np.argmax(r2_adj)], r2_adj_end_imf[np.argmax(r2_adj)]]
        aic_list[ts_i][j] = [np.min(aic), aic_start_imf[np.argmin(aic)], aic_end_imf[np.argmin(aic)]]
        bic_list[ts_i][j] = [np.min(bic), bic_start_imf[np.argmin(bic)], bic_end_imf[np.argmin(bic)]]

        j+=1



  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_plus[n,k])**2)
  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_plus[n,k])**2)
  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_plus[n,k])**2)
  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_plus[n,k])**2)
  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_

In [11]:
r2 = np.zeros((len(alpha)*len(DC)*len(init), 2), dtype=float)
r2_adj = np.zeros((len(alpha)*len(DC)*len(init), 2), dtype=float)
aic = np.zeros((len(alpha)*len(DC)*len(init), 2), dtype=float)
bic = np.zeros((len(alpha)*len(DC)*len(init), 2), dtype=float)

for ts_i in range(len(TS)):

    r2 = np.transpose(r2_list[ts_i])
    max_r2 = np.max(r2[0])
    argmax_r2 = np.argmax(r2[0])
    begin_imfs = int(r2[1][argmax_r2])
    end_imfs = int(r2[2][argmax_r2])
    if (max_r2 > best_metriks[ts_i]["R2"]):
        best_metriks[ts_i]["R2"] = max_r2
        
        best_r2_params[ts_i]["alpha"] = alpha_j[argmax_r2]
        best_r2_params[ts_i]["DC"] = DC_j[argmax_r2]
        best_r2_params[ts_i]["init"] = init_j[argmax_r2]
        best_r2_params[ts_i]["IMFs"] = f'{begin_imfs+1} --- {end_imfs}'


    r2_adj = np.transpose(r2_adj_list[ts_i])
    max_r2_adj = np.max(r2_adj[0])
    argmax_r2_adj = np.argmax(r2_adj[0])
    begin_imfs = int(r2_adj[1][argmax_r2_adj])
    end_imfs = int(r2_adj[2][argmax_r2_adj])
    if (max_r2_adj > best_metriks[ts_i]["R2_Adj"]):
        best_metriks[ts_i]["R2_Adj"] = max_r2_adj

        best_r2_adj_params[ts_i]["alpha"] = alpha_j[argmax_r2_adj]
        best_r2_adj_params[ts_i]["DC"] = DC_j[argmax_r2_adj]
        best_r2_adj_params[ts_i]["init"] = init_j[argmax_r2_adj]
        best_r2_adj_params[ts_i]["IMFs"] = f'{begin_imfs+1} --- {end_imfs}'


    aic = np.transpose(aic_list[ts_i])
    min_aic = np.min(aic[0])
    argmin_aic = np.argmin(aic[0])
    begin_imfs = int(aic[1][argmin_aic])
    end_imfs = int(aic[2][argmin_aic])
    if (min_aic < best_metriks[ts_i]["AIC"]):
        best_metriks[ts_i]["AIC"] = min_aic

        best_aic_params[ts_i]["alpha"] = alpha_j[argmin_aic]
        best_aic_params[ts_i]["DC"] = DC_j[argmin_aic]
        best_aic_params[ts_i]["init"] = init_j[argmin_aic]

        best_aic_params[ts_i]["IMFs"] = f'{begin_imfs+1} --- {end_imfs}'

    bic = np.transpose(bic_list[ts_i])
    min_bic = np.min(bic[0])
    argmin_bic = np.argmin(bic[0])
    begin_imfs = int(bic[1][argmin_bic])
    end_imfs = int(bic[2][argmin_bic])
    if (min_bic < best_metriks[ts_i]["BIC"]):
        best_metriks[ts_i]["BIC"] = min_bic

        best_bic_params[ts_i]["alpha"] = alpha_j[argmin_bic]
        best_bic_params[ts_i]["DC"] = DC_j[argmin_bic]
        best_bic_params[ts_i]["init"] = init_j[argmin_bic]

        best_bic_params[ts_i]["IMFs"] = f'{begin_imfs+1} --- {end_imfs}'



# метрики точности и параметры с выводом

In [12]:
for ts_i in range(len(TS)):
    best_metriks[ts_i]["BIC"] = format(best_metriks[ts_i]["BIC"], ".5f")
    best_metriks[ts_i]["AIC"] = format(best_metriks[ts_i]["AIC"], ".5f")
    best_metriks[ts_i]["R2_Adj"] = format(best_metriks[ts_i]["R2_Adj"], ".5f")
    best_metriks[ts_i]["R2"] = format(best_metriks[ts_i]["R2"], ".5f")

In [13]:
filename = f"{path1}Output{path2}Best_Metriks-{Component_name}.csv"

fields = best_metriks[0].keys()
fields = list(fields)

with open(filename, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fields)
    writer.writeheader()  # Write header row
    for ts_i in range(len(TS)):
        writer.writerows([best_metriks[ts_i]])  # Write data rows

In [14]:
filename = f"{path1}Output{path2}Parameters-{Component_name}.csv"

fields = best_r2_params[0].keys()
fields = list(fields)

with open(filename, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fields)
    writer.writeheader()  # Write header row
    for ts_i in range(len(TS)):
        writer.writerows([best_r2_params[ts_i]])  # Write data rows
        writer.writerows([best_r2_adj_params[ts_i]])  # Write data rows
        writer.writerows([best_aic_params[ts_i]])  # Write data rows
        writer.writerows([best_bic_params[ts_i]])  # Write data rows

