In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 15)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from pandas.plotting import register_matplotlib_converters
from mpl_toolkits.mplot3d import Axes3D

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
register_matplotlib_converters()
from time import time
import seaborn as sns
sns.set(style="whitegrid")

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.covariance import EllipticEnvelope

import warnings
warnings.filterwarnings('ignore')

from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.dates as mdates

import os
from glob import glob
from pathlib import Path

RANDOM_SEED = np.random.seed(0)

In [4]:
INTERVAL = 30

for filename in glob(Path(f"interval{INTERVAL}/200702111400/*/*").__str__()):
    trendFilename = filename.replace(f"interval{INTERVAL}", f"interval{INTERVAL}_reconstructed_STL_trend")
    seasonalFilename = filename.replace(f"interval{INTERVAL}", f"interval{INTERVAL}_reconstructed_STL_seasonal")
    combinedFilename = filename.replace(f"interval{INTERVAL}", f"interval{INTERVAL}_reconstructed_STL_combined")
    detrendFilename = filename.replace(f"interval{INTERVAL}", f"interval{INTERVAL}_reconstructed_STL_detrend")
    if os.path.exists(detrendFilename):
        continue
    
    df = pd.read_csv(filename)
    df_trend = pd.DataFrame(df[["timeStart", "SrcIP"]])
    df_seasonal = pd.DataFrame(df[["timeStart", "SrcIP"]])
    df_combined = pd.DataFrame(df[["timeStart", "SrcIP"]])
    df_detrend = pd.DataFrame(df[["timeStart", "SrcIP"]])
    for column in df.columns.difference(["timeStart", "SrcIP"]):
        index = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
        temp = pd.DataFrame({'data': df[column].values}, index=index).asfreq('D').fillna(method='ffill')
        result = seasonal_decompose(x=temp, model='additive')
        
        trend = result.trend.values
        seasonal = result.seasonal.values
        detrend = (temp['data'] - result.trend.fillna(0)).values
        
        df_trend[column] = trend
        df_seasonal[column] = seasonal
        df_combined[column] = trend + seasonal
        df_detrend[column] = detrend
    
    os.makedirs(os.path.dirname(trendFilename), exist_ok=True)
    os.makedirs(os.path.dirname(seasonalFilename), exist_ok=True)
    os.makedirs(os.path.dirname(combinedFilename), exist_ok=True)
    os.makedirs(os.path.dirname(detrendFilename), exist_ok=True)
        
    df_trend.to_csv(trendFilename, index=False)
    df_seasonal.to_csv(seasonalFilename, index=False)
    df_combined.to_csv(combinedFilename, index=False)
    df_detrend.to_csv(detrendFilename, index=False)

In [3]:
# for filename in glob(Path('reconstructed_STL_trend/200702111400/*/*').__str__()):
#     trendFilename = filename.replace("reconstructed_STL_trend", "reconstructed_dual_STL_trend")
#     seasonalFilename = filename.replace("reconstructed_STL_trend", "reconstructed_dual_STL_seasonal")
#     combinedFilename = filename.replace("reconstructed_STL_trend", "reconstructed_dual_STL_combined")
#     if os.path.exists(trendFilename):
#         continue
    
#     df = pd.read_csv(filename)
#     df_trend = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     df_seasonal = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     df_combined = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     for column in df.columns.difference(["timeStart", "SrcIP"]):
#         index = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
#         temp = pd.DataFrame({'data': df[column].values}, index=index).asfreq('D').fillna(method='ffill')
#         result = seasonal_decompose(x=temp, model='additive')
        
#         trend = result.trend.values
#         trend = np.where(np.isnan(trend), np.ma.array(trend, mask=np.isnan(trend)).mean(axis=0), trend)
        
#         seasonal = result.seasonal.values
        
#         df_trend[column] = trend
#         df_seasonal[column] = seasonal
#         df_combined[column] = trend + seasonal
    
#     os.makedirs(os.path.dirname(trendFilename), exist_ok=True)
#     os.makedirs(os.path.dirname(seasonalFilename), exist_ok=True)
#     os.makedirs(os.path.dirname(combinedFilename), exist_ok=True)
        
#     df_trend.to_csv(trendFilename, index=False)
#     df_seasonal.to_csv(seasonalFilename, index=False)
#     df_combined.to_csv(combinedFilename, index=False)

In [4]:
# for filename in glob(Path('reconstructed_dual_STL_trend/200702111400/*/*').__str__()):
#     trendFilename = filename.replace("reconstructed_dual_STL_trend", "reconstructed_trial_STL_trend")
#     seasonalFilename = filename.replace("reconstructed_dual_STL_trend", "reconstructed_trial_STL_seasonal")
#     combinedFilename = filename.replace("reconstructed_dual_STL_trend", "reconstructed_trial_STL_combined")
#     if os.path.exists(trendFilename):
#         continue
    
#     df = pd.read_csv(filename)
#     df_trend = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     df_seasonal = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     df_combined = pd.DataFrame(df[["timeStart", "SrcIP"]])
#     for column in df.columns.difference(["timeStart", "SrcIP"]):
#         index = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
#         temp = pd.DataFrame({'data': df[column].values}, index=index).asfreq('D').fillna(method='ffill')
#         result = seasonal_decompose(x=temp, model='additive')
        
#         trend = result.trend.values
#         trend = np.where(np.isnan(trend), np.ma.array(trend, mask=np.isnan(trend)).mean(axis=0), trend)
        
#         seasonal = result.seasonal.values
        
#         df_trend[column] = trend
#         df_seasonal[column] = seasonal
#         df_combined[column] = trend + seasonal
    
#     os.makedirs(os.path.dirname(trendFilename), exist_ok=True)
#     os.makedirs(os.path.dirname(seasonalFilename), exist_ok=True)
#     os.makedirs(os.path.dirname(combinedFilename), exist_ok=True)
        
#     df_trend.to_csv(trendFilename, index=False)
#     df_seasonal.to_csv(seasonalFilename, index=False)
#     df_combined.to_csv(combinedFilename, index=False)

In [5]:
# def parser(s):
#     return datetime.strptime(s, '%Y-%m-%d %H:%M:%S')

In [6]:
# df = pd.read_csv("interval1/200702111400/256500/256507-212_136_45_198.csv")
# dates = np.array('2020-01-01', dtype=np.datetime64) + np.arange(len(df))
# temp = pd.DataFrame({'data': df["bytes"].values}, index=dates).asfreq('D').fillna(method='ffill')
# result = seasonal_decompose(x=temp, model='additive')

In [7]:
# import numpy.ma as ma
# a = result.trend.values
# trend = np.where(np.isnan(a), ma.array(a, mask=np.isnan(a)).mean(axis=0), a)
# seasonal = result.seasonal.values
# (trend + seasonal).shape