In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from scipy.interpolate import CubicSpline
from itertools import product
from sklearn.metrics import r2_score
import csv
from vmd import VMD

In [2]:
# adjusted R-squared
def r2_score_adjusted(y, y_pred, featurecount):
    n = len(y)
    return 1 - (1-r2_score(y, y_pred)) * (n-1)/(n-featurecount-1)

# AIC
def calculate_aic(y, y_pred, k):
    sse = np.sum((y - y_pred) ** 2)
    n = len(y)
    aic = n * np.log(sse / n) + 2 * k
    return aic

# BIC
def calculate_bic(y, y_pred, k):
    sse = np.sum((y - y_pred) ** 2)
    n = len(y)
    bic = n * np.log(sse / n) + k * np.log(n)
    return bic

# импорт временного ряда

In [3]:
# linux path
path1 = './'
path2 = '/'

# windows path
# path1 = '.\\'
# path2 = '\\'

In [4]:
seasonsTS1 = ("s1")
seasonsTS2 = ("s2")
trendsTS = ("t1", "t2", "t3", "t4", "t5", "t6", "t7")
noisesTS = ("n11", "n11", "n11", "n11", "n11", "n11", "n11")

cases = ("t1_n11_s1_s2", "t2_n11_s1_s2", "t3_n11_s1_s2", "t4_n11_s1_s2", "t5_n11_s1_s2", "t6_n11_s1_s2", "t7_n11_s1_s2")

In [5]:
Noise = list()
for noise_i in noisesTS:
    n = pd.read_csv(f"{path1}Components{path2}{noise_i}.csv", decimal=',')
    noise = pd.DataFrame(n, dtype=float)
    Noise.append(noise['Value'])

range_of_imfs = range(10)
Component_name = "Noise-freq"
Component_of_TS = np.zeros(len(Noise), dtype=list)
for ts_i in range(len(Noise)):
    Component_of_TS[ts_i] = np.abs(np.fft.rfft(Noise[ts_i]))


In [6]:
index = list()
TS = list()

data = pd.read_csv(f"{path1}Cases{path2}{cases[0]}.csv", decimal=',')
df = pd.DataFrame(data, dtype=float)
index = df.index

for case_i in cases:
    data = pd.read_csv(f"{path1}Cases{path2}{case_i}.csv", decimal=',')
    df = pd.DataFrame(data, dtype=float)
    TS.append(df.values)


# тестовый анализ

In [18]:
list(range_of_imfs)

[9, 8, 7, 6, 5, 4, 3, 2]

In [19]:
np.shape(Component_of_TS)

(7,)

In [20]:
range_of_imfs[0]

9

In [21]:
TS[0]

array([[-1.76212977e-02],
       [-2.07655145e+01],
       [-1.85068425e+01],
       ...,
       [ 7.66732486e+01],
       [ 9.80532668e+01],
       [ 6.56184883e+01]], shape=(1500, 1))

In [22]:
np.shape(TS)

(7, 1500, 1)

# декомпозиция временного ряда

In [7]:
alpha = (0, 500, 1000, 2500, 5000, 7500, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000)
init = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
DC = (False, True)
K = 10

tau = 0            #BY DEFAULT  
tol = 1e-6         #BY DEFAULT

featurecount = 6

In [8]:
best_metriks = np.zeros(len(TS), dtype=dict)

best_r2_params = np.zeros(len(TS), dtype=dict)
best_r2_adj_params = np.zeros(len(TS), dtype=dict)
best_aic_params = np.zeros(len(TS), dtype=dict)
best_bic_params = np.zeros(len(TS), dtype=dict)

for i in range(len(TS)):
    best_metriks[i] = {"Time Series": f"TS{i+1}", "R2": -np.inf, "R2_Adj": -np.inf, "AIC" : np.inf, "BIC": np.inf}
    
    best_r2_params[i] = ({"Time Series": f"TS{i+1}", "Metric": "R2", "alpha": 0, "tau": tau, "K": 3, "DC": False, "init": 0, "IMFs" : "1"}) 
    best_r2_adj_params[i] = ({"Time Series": f"TS{i+1}", "Metric": "R2_Adj", "alpha": 0, "tau": tau, "K": 3, "DC": False, "init": 0, "IMFs" : "1"}) 
    best_aic_params[i] = ({"Time Series": f"TS{i+1}", "Metric": "AIC", "alpha": 0, "tau": tau, "K": 3, "DC": False, "init": 0, "IMFs" : "1"}) 
    best_bic_params[i] = ({"Time Series": f"TS{i+1}", "Metric": "BIC", "alpha": 0, "tau": tau, "K": 3, "DC": False, "init": 0, "IMFs" : "1"}) 



In [9]:
# для всех лчм
for ts_i in range(len(TS)):

    j=0
    r2_list = np.zeros((len(alpha)*len(DC)*len(init), 2), dtype=float)
    r2_adj_list = np.zeros((len(alpha)*len(DC)*len(init), 2), dtype=float)
    aic_list = np.zeros((len(alpha)*len(DC)*len(init), 2), dtype=float)
    bic_list = np.zeros((len(alpha)*len(DC)*len(init), 2), dtype=float)
    # для всех наборов параметров
    for params_set in list(product(alpha, DC, init)):
        u, u_hat, omega = VMD(TS[ts_i], alpha=params_set[0], tau=tau, K=K, DC=params_set[1], init=params_set[2], tol=tol)
        
        r2 = list()
        r2_adj = list()
        aic = list()
        bic = list()
        sum_of_imfs = 0
        for i_imfs in range_of_imfs:
            sum_of_imfs += u[i_imfs].T
            residual = TS[ts_i].T[0] - sum_of_imfs
            imf_freg = np.abs(np.fft.rfft(residual))

            r2.append(r2_score(Component_of_TS[ts_i], imf_freg))
            r2_adj.append(r2_score_adjusted(Component_of_TS[ts_i], imf_freg, featurecount))
            aic.append(calculate_aic(Component_of_TS[ts_i], imf_freg, featurecount))
            bic.append(calculate_bic(Component_of_TS[ts_i], imf_freg, featurecount))

        r2_list[j] = [np.max(r2), np.argmax(r2)]
        r2_adj_list[j] = [np.max(r2_adj), np.argmax(r2_adj)]
        aic_list[j] = [np.min(aic), np.argmin(aic)]
        bic_list[j] = [np.min(bic), np.argmin(bic)]

        j+=1


    r2_list = np.transpose(r2_list)
    max_r2 = np.max(r2_list[0])
    argmax_r2 = np.argmax(r2_list[0])
    i_imfs = int(r2_list[1][argmax_r2])
    if (max_r2 > best_metriks[ts_i]["R2"]):
        best_metriks[ts_i]["R2"] = max_r2
        
        best_r2_params[ts_i]["alpha"] = params_set[0]
        best_r2_params[ts_i]["DC"] = params_set[1]
        best_r2_params[ts_i]["init"] = params_set[2]
        best_r2_params[ts_i]["IMFs"] = f'{range_of_imfs[0]+1} + {i_imfs}'


    r2_adj_list = np.transpose(r2_adj_list)
    max_r2_adj = np.max(r2_adj_list[0])
    argmax_r2_adj = np.argmax(r2_adj_list[0])
    i_imfs = int(r2_adj_list[1][argmax_r2_adj])
    if (max_r2_adj > best_metriks[ts_i]["R2_Adj"]):
        best_metriks[ts_i]["R2_Adj"] = max_r2_adj

        best_r2_adj_params[ts_i]["alpha"] = params_set[0]
        best_r2_adj_params[ts_i]["DC"] = params_set[1]
        best_r2_adj_params[ts_i]["init"] = params_set[2]
        best_r2_adj_params[ts_i]["IMFs"] = f'{range_of_imfs[0]+1} + {i_imfs}'
    

    aic_list = np.transpose(aic_list)
    min_aic = np.min(aic_list[0])
    argmin_aic = np.argmin(aic_list[0])
    i_imfs = int(aic_list[1][argmin_aic])
    if (min_aic < best_metriks[ts_i]["AIC"]):
        best_metriks[ts_i]["AIC"] = min_aic

        best_aic_params[ts_i]["alpha"] = params_set[0]
        best_aic_params[ts_i]["DC"] = params_set[1]
        best_aic_params[ts_i]["init"] = params_set[2]

        best_aic_params[ts_i]["IMFs"] = f'{range_of_imfs[0]+1} + {i_imfs}'

    bic_list = np.transpose(bic_list)
    min_bic = np.min(bic_list[0])
    argmin_bic = np.argmin(bic_list[0])
    i_imfs = int(bic_list[1][argmin_bic])
    if (min_bic < best_metriks[ts_i]["BIC"]):
        best_metriks[ts_i]["BIC"] = min_bic

        best_bic_params[ts_i]["alpha"] = params_set[0]
        best_bic_params[ts_i]["DC"] = params_set[1]
        best_bic_params[ts_i]["init"] = params_set[2]

        best_bic_params[ts_i]["IMFs"] = f'{range_of_imfs[0]+1} + {i_imfs}'


  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_plus[n,k])**2)
  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_plus[n,k])**2)
  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_plus[n,k])**2)
  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_plus[n,k])**2)
  return np.dot(freqs[T//2:T],(abs(u_hat_plus[n+1, T//2:T, k])**2))/np.sum(abs(u_hat_plus[n+1,T//2:T,k])**2)
  return (f_hat_plus - sum_uk - lambda_hat[n,:]/2)/(1.+Alpha[k]*(freqs - omega_

# метрики точности и параметры с выводом

In [10]:
for ts_i in range(len(TS)):
    best_metriks[ts_i]["BIC"] = format(best_metriks[ts_i]["BIC"], ".5f")
    best_metriks[ts_i]["AIC"] = format(best_metriks[ts_i]["AIC"], ".5f")
    best_metriks[ts_i]["R2_Adj"] = format(best_metriks[ts_i]["R2_Adj"], ".5f")
    best_metriks[ts_i]["R2"] = format(best_metriks[ts_i]["R2"], ".5f")

In [11]:
filename = f"{path1}Output{path2}Best_Metriks-{Component_name}.csv"

fields = best_metriks[0].keys()
fields = list(fields)

with open(filename, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fields)
    writer.writeheader()  # Write header row
    for ts_i in range(len(TS)):
        writer.writerows([best_metriks[ts_i]])  # Write data rows

In [12]:
filename = f"{path1}Output{path2}Parameters-{Component_name}.csv"

fields = best_r2_params[0].keys()
fields = list(fields)

with open(filename, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fields)
    writer.writeheader()  # Write header row
    for ts_i in range(len(TS)):
        writer.writerows([best_r2_params[ts_i]])  # Write data rows
        writer.writerows([best_r2_adj_params[ts_i]])  # Write data rows
        writer.writerows([best_aic_params[ts_i]])  # Write data rows
        writer.writerows([best_bic_params[ts_i]])  # Write data rows

