In [4]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

from statsmodels.tsa.seasonal import seasonal_decompose

import os
from glob import glob
from pathlib import Path

INTERVAL = 1

reconstruct src feature

In [5]:
original_prefix = f"interval_{INTERVAL}_src_feature"

for filename in glob(Path(f"{original_prefix}/*").__str__()):
    print("Reconstructing", filename, "...", " " * 20, end="\r")
    trendFilename = filename.replace(original_prefix, f"{original_prefix}_reconstructed_STL_trend")
    seasonalFilename = filename.replace(original_prefix, f"{original_prefix}_reconstructed_STL_seasonal")
    combinedFilename = filename.replace(original_prefix, f"{original_prefix}_reconstructed_STL_combined")
    detrendFilename = filename.replace(original_prefix, f"{original_prefix}_reconstructed_STL_detrend")
    if os.path.exists(detrendFilename):
        continue
    
    df = pd.read_parquet(filename)
    df_trend = pd.DataFrame(df[["timeStart", "srcIP"]])
    df_seasonal = pd.DataFrame(df[["timeStart", "srcIP"]])
    df_combined = pd.DataFrame(df[["timeStart", "srcIP"]])
    df_detrend = pd.DataFrame(df[["timeStart", "srcIP"]])
    for column in df.columns.difference(["timeStart", "srcIP"]):
        index = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
        temp = pd.DataFrame({'data': df[column].values}, index=index).asfreq('D').fillna(method='ffill')
        result = seasonal_decompose(x=temp, model='additive')
        
        trend = result.trend.values
        seasonal = result.seasonal.values
        detrend = (temp['data'] - result.trend.fillna(0)).values
        
        df_trend[column] = trend
        df_seasonal[column] = seasonal
        df_combined[column] = trend + seasonal
        df_detrend[column] = detrend
    
    os.makedirs(os.path.dirname(trendFilename), exist_ok=True)
    os.makedirs(os.path.dirname(seasonalFilename), exist_ok=True)
    os.makedirs(os.path.dirname(combinedFilename), exist_ok=True)
    os.makedirs(os.path.dirname(detrendFilename), exist_ok=True)
        
    df_trend.to_parquet(trendFilename, index=False)
    df_seasonal.to_parquet(seasonalFilename, index=False)
    df_combined.to_parquet(combinedFilename, index=False)
    df_detrend.to_parquet(detrendFilename, index=False)

Reconstructing interval_1_src_feature\99.3.118.211.parquet ...                        

reconstruct dst feature

In [7]:
original_prefix = f"interval_{INTERVAL}_dst_feature"

for filename in glob(Path(f"{original_prefix}/*").__str__()):
    print("Reconstructing", filename, "...", " " * 20, end="\r")
    trendFilename = filename.replace(original_prefix, f"{original_prefix}_reconstructed_STL_trend")
    seasonalFilename = filename.replace(original_prefix, f"{original_prefix}_reconstructed_STL_seasonal")
    combinedFilename = filename.replace(original_prefix, f"{original_prefix}_reconstructed_STL_combined")
    detrendFilename = filename.replace(original_prefix, f"{original_prefix}_reconstructed_STL_detrend")
    if os.path.exists(detrendFilename):
        continue
    
    df = pd.read_parquet(filename)
    df_trend = pd.DataFrame(df[["timeStart", "dstIP"]])
    df_seasonal = pd.DataFrame(df[["timeStart", "dstIP"]])
    df_combined = pd.DataFrame(df[["timeStart", "dstIP"]])
    df_detrend = pd.DataFrame(df[["timeStart", "dstIP"]])
    for column in df.columns.difference(["timeStart", "dstIP"]):
        index = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
        temp = pd.DataFrame({'data': df[column].values}, index=index).asfreq('D').fillna(method='ffill')
        result = seasonal_decompose(x=temp, model='additive')
        
        trend = result.trend.values
        seasonal = result.seasonal.values
        detrend = (temp['data'] - result.trend.fillna(0)).values
        
        df_trend[column] = trend
        df_seasonal[column] = seasonal
        df_combined[column] = trend + seasonal
        df_detrend[column] = detrend
    
    os.makedirs(os.path.dirname(trendFilename), exist_ok=True)
    os.makedirs(os.path.dirname(seasonalFilename), exist_ok=True)
    os.makedirs(os.path.dirname(combinedFilename), exist_ok=True)
    os.makedirs(os.path.dirname(detrendFilename), exist_ok=True)
        
    df_trend.to_parquet(trendFilename, index=False)
    df_seasonal.to_parquet(seasonalFilename, index=False)
    df_combined.to_parquet(combinedFilename, index=False)
    df_detrend.to_parquet(detrendFilename, index=False)

Reconstructing interval_1_dst_feature\99.91.213.113.parguet ...                       

In [None]:
# for filename in glob(Path('reconstructed_STL_trend/200702111400/*/*').__str__()):
#     trendFilename = filename.replace("reconstructed_STL_trend", "reconstructed_dual_STL_trend")
#     seasonalFilename = filename.replace("reconstructed_STL_trend", "reconstructed_dual_STL_seasonal")
#     combinedFilename = filename.replace("reconstructed_STL_trend", "reconstructed_dual_STL_combined")
#     if os.path.exists(trendFilename):
#         continue
    
#     df = pd.read_csv(filename)
#     df_trend = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     df_seasonal = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     df_combined = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     for column in df.columns.difference(["timeStart", "SrcIP"]):
#         index = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
#         temp = pd.DataFrame({'data': df[column].values}, index=index).asfreq('D').fillna(method='ffill')
#         result = seasonal_decompose(x=temp, model='additive')
        
#         trend = result.trend.values
#         trend = np.where(np.isnan(trend), np.ma.array(trend, mask=np.isnan(trend)).mean(axis=0), trend)
        
#         seasonal = result.seasonal.values
        
#         df_trend[column] = trend
#         df_seasonal[column] = seasonal
#         df_combined[column] = trend + seasonal
    
#     os.makedirs(os.path.dirname(trendFilename), exist_ok=True)
#     os.makedirs(os.path.dirname(seasonalFilename), exist_ok=True)
#     os.makedirs(os.path.dirname(combinedFilename), exist_ok=True)
        
#     df_trend.to_csv(trendFilename, index=False)
#     df_seasonal.to_csv(seasonalFilename, index=False)
#     df_combined.to_csv(combinedFilename, index=False)

In [None]:
# for filename in glob(Path('reconstructed_dual_STL_trend/200702111400/*/*').__str__()):
#     trendFilename = filename.replace("reconstructed_dual_STL_trend", "reconstructed_trial_STL_trend")
#     seasonalFilename = filename.replace("reconstructed_dual_STL_trend", "reconstructed_trial_STL_seasonal")
#     combinedFilename = filename.replace("reconstructed_dual_STL_trend", "reconstructed_trial_STL_combined")
#     if os.path.exists(trendFilename):
#         continue
    
#     df = pd.read_csv(filename)
#     df_trend = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     df_seasonal = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     df_combined = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     for column in df.columns.difference(["timeStart", "SrcIP"]):
#         index = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
#         temp = pd.DataFrame({'data': df[column].values}, index=index).asfreq('D').fillna(method='ffill')
#         result = seasonal_decompose(x=temp, model='additive')
        
#         trend = result.trend.values
#         trend = np.where(np.isnan(trend), np.ma.array(trend, mask=np.isnan(trend)).mean(axis=0), trend)
        
#         seasonal = result.seasonal.values
        
#         df_trend[column] = trend
#         df_seasonal[column] = seasonal
#         df_combined[column] = trend + seasonal
    
#     os.makedirs(os.path.dirname(trendFilename), exist_ok=True)
#     os.makedirs(os.path.dirname(seasonalFilename), exist_ok=True)
#     os.makedirs(os.path.dirname(combinedFilename), exist_ok=True)
        
#     df_trend.to_csv(trendFilename, index=False)
#     df_seasonal.to_csv(seasonalFilename, index=False)
#     df_combined.to_csv(combinedFilename, index=False)

In [None]:
# def parser(s):
#     return datetime.strptime(s, '%Y-%m-%d %H:%M:%S')

In [None]:
# df = pd.read_csv("interval1/200702111400/256500/256507-212_136_45_198.csv")
# dates = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
# temp = pd.DataFrame({'data': df["bytes"].values}, index=dates).asfreq('D').fillna(method='ffill')
# result = seasonal_decompose(x=temp, model='additive')

In [None]:
# import numpy.ma as ma
# a = result.trend.values
# trend = np.where(np.isnan(a), ma.array(a, mask=np.isnan(a)).mean(axis=0), a)
# seasonal = result.seasonal.values
# (trend + seasonal).shape