In [1]:
import pandas as pd
import numpy as np
import datetime

import warnings
warnings.filterwarnings('ignore')

In [2]:
data_ace = pd.read_csv(r"Compare_ACE_DSCOVR.csv", sep=';', na_values='N', decimal=',')

In [3]:
data_discover = pd.read_csv(r'Discover_с_интреполяцией12_актуальные_данные_до_22.09.2024.csv', sep=';', decimal=',')

##### Преобразование индексов в виду datetime

In [4]:
from_index_dst_ace = 9
from_index_dst_discover = 4

def to_datetime_ace(df):
    date_values = ['year', 'month', 'day', 'hour from']
    date = df[date_values].copy()

    date['hour from'] = date['hour from'].apply(lambda x: f'{x:02d}')
    date['datetime'] = pd.to_datetime(
        date['year'].astype(str) + '-' +
        date['month'].astype(str) + '-' +
        date['day'].astype(str) + ' ' +
        date['hour from']
    )

    dataset = df.copy()
    dataset = dataset.set_index(date['datetime'])
    return dataset

def to_datetime_discover(df):
    dataset = df.iloc[:, from_index_dst_discover:]
    df = df.rename(columns={'Date time': 'datetime'}, inplace=True)
    dataset = dataset.set_index(pd.to_datetime(data_discover['datetime']))
    return dataset

In [5]:
data_ace = to_datetime_ace(data_ace)
data_discover = to_datetime_discover(data_discover)

##### Добавление к данным Discover столбца c dst в соответсвующие даты и время

In [6]:
data_discover = data_ace['Dst'].to_frame().merge(data_discover, how='right', on='datetime')

#####  Осуществление погружения временного ряда (delay embedding)

In [7]:
def shifting(df, delay_values, future_shift, type):
    data = pd.DataFrame(index=data_ace.index if type == 'ace' else data_discover.index)
    new_columns = pd.DataFrame(index=data_ace.index if type == 'ace' else data_discover.index)
    
    for column_name, depth_of_delay in delay_values.items():
        if column_name == 'Dst':
            list_of_delays = [j*(-1) for j in reversed(range(1, future_shift+1))] + list(range(0, depth_of_delay+1))
            new_names_for_columns = [f'{column_name} +{i}' for i in reversed(range(1, future_shift+1))] + [column_name, ] + [f'{column_name} -{i}' for i in range(1, depth_of_delay+1)]
            df_shifted = df[column_name].shift(periods=list_of_delays, fill_value=np.nan)
            df_shifted.columns = new_names_for_columns
            data = data.join(df_shifted)
        elif column_name != 'Dst':
            new_columns[column_name] = df[column_name]
            for i in range(1, depth_of_delay+1):
                new_columns[f'{column_name} -{i}'] = df[column_name].shift(periods=i, fill_value=np.nan)
    df_shifted = pd.DataFrame(new_columns)
    data = data.join(df_shifted)
            
    return data

In [8]:
column_names_ace = ['B_x', 'B_gsm_y', 'B_gsm_z', 'B_magn', 'H_den_SWP', 'SW_spd', 'Trr_SWP']
column_names_discover = ['bx_gsm', 'by_gsm', 'bz_gsm', 'bt', 'proton_density', 'proton_speed', 'proton_temperature']

rename_dict = {column_names_discover[i]: column_names_ace[i] for i in range(len(column_names_discover))}

data_discover = data_discover.rename(columns=rename_dict)

In [9]:
column_names = ['Dst', 'B_x', 'B_gsm_y', 'B_gsm_z', 'B_magn', 'H_den_SWP', 'SW_spd', 'Trr_SWP']

# Оптимальные глубины погружения с учётом автокорреляционных функций (для отсутсвующих значений беру глубину = 24 часам)
# Dst - 43, Bx - 26, B_GSM_y - 12, B_GSM_z - 3, B_magn - 19, SW_spd - 56, H_den - 16

delay_values_autocorr = [43, 26, 12, 3, 19, 16, 56, 24]
delay_values_24 = [24, ]*8

future_shift = 3 #горизонт предсказания

In [12]:
values_for_shift_24 = dict(zip(column_names, delay_values_24))
values_for_shift_autocorr = dict(zip(column_names, delay_values_autocorr))

In [13]:
data_ace_shifted_24 = shifting(data_ace, values_for_shift_24, future_shift, 'ace')

In [14]:
data_discover_shifted_24 = shifting(data_discover, values_for_shift_24, future_shift, 'discover')

In [15]:
data_ace_shifted_autocorr = shifting(data_ace, values_for_shift_autocorr, future_shift, 'ace')

In [16]:
data_discover_shifted_autocorr = shifting(data_discover, values_for_shift_autocorr, future_shift, 'discover')

##### Преобразование погруженных данных в файлы

In [17]:
data_ace_shifted_24.to_csv("Ace_погружение_24часа.csv", sep=';', encoding='utf-8')

In [18]:
data_discover_shifted_24.to_csv("Discover_погружение_24часа_до_22.09.2024.csv", sep=';', encoding='utf-8')

In [19]:
data_ace_shifted_autocorr.to_csv("Ace_погружение_автокорр.csv", sep=';', encoding='utf-8')

In [20]:
data_discover_shifted_autocorr.to_csv("Discover_погружение_автокорр_до_22.09.2024.csv", sep=';', encoding='utf-8')