# 패키지 로딩 및 시드 설정

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

from prophet import Prophet 


from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import torch
import torch.nn as nn

random_seed = 1339
SEED = 1339
torch.manual_seed(random_seed)
np.random.seed(random_seed)
import random
import os
random.seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

def set_seeds(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)

set_seeds()

Importing plotly failed. Interactive plots will not work.


# 데이터 불러오기

In [3]:
data=pd.read_csv("dataset/data/IP/DHCP.csv")

# 전처리 단계

## Timestamp 변경

In [37]:
time=data["Timestamp"].str.split("-", expand = True)
time.drop(columns = [1], inplace = True)
time=time[0].str.split("_", expand = True)
date = time[[0]]
time = time[[1]]

year = []
for i in range(len(date)) :
year.append(date[0][i][0:4])

month = []
for i in range(len(date)) :
month.append(date[0][i][4:6])

day = []
for i in range(len(date)) :
day.append(date[0][i][6:8])

hour = []
for i in range(len(date)) :
hour.append(time[1][i][0:2])

min = []
for i in range(len(date)) :
min.append(time[1][i][2:4])

time_data = pd.DataFrame({'year':year,'month':month,'day':day, 'hour' : hour, 'min':min})
timestamp = time_data['year'] +"-" +time_data['month'] + "-" +time_data['day'] + " " + time_data['hour'] +":" + time_data["min"] + ":" + "00"
data["Timestamp"] = timestamp


## train/test split

In [38]:
data["month"] = month

In [39]:
data["month"] = data["month"].astype(int)

In [40]:
train=data[data["month"]<7]
test=data[data["month"]>=7]

## 보간

In [41]:
train.index = train["Timestamp"].astype("Datetime64")
train=train.drop(columns = ["Timestamp"])
train = train.interpolate(method = "time")
train = train.reset_index()

test.index = test["Timestamp"].astype("Datetime64")
test=test.drop(columns = ["Timestamp"])
test = test.interpolate(method = "time")
test = test.reset_index()

## 추세제거

In [42]:
# ##### Svr_detect
Svr_detect = pd.concat([train['Timestamp'],train["Svr_detect"]],axis = 1)
Svr_detect = Svr_detect.rename(columns = {'Timestamp' : 'ds', "Svr_detect" : 'y'})
Svr_detect_model = Prophet()
Svr_detect_model.fit(Svr_detect)
Svr_detect_predict = Svr_detect_model.predict(pd.DataFrame(Svr_detect["ds"]))
Svr_detect["trend_off"]=Svr_detect['y'] - Svr_detect_predict['trend']

Svr_detect_test = pd.concat([test['Timestamp'],test["Svr_detect"]],axis = 1)
Svr_detect_test = Svr_detect_test.rename(columns = {'Timestamp' : 'ds', "Svr_detect" : 'y'})
Svr_detect_test_model = Prophet()
Svr_detect_test_model.fit(Svr_detect_test)
Svr_detect_test_predict = Svr_detect_test_model.predict(pd.DataFrame(Svr_detect_test["ds"]))
Svr_detect_test["trend_off"]=Svr_detect_test['y'] - Svr_detect_test_predict['trend']

# ##### Svr_connect
Svr_connect = pd.concat([train['Timestamp'],train["Svr_connect"]],axis = 1)
Svr_connect = Svr_connect.rename(columns = {'Timestamp' : 'ds', "Svr_connect" : 'y'})
Svr_connect_model = Prophet()
Svr_connect_model.fit(Svr_connect)
Svr_connect_predict = Svr_connect_model.predict(pd.DataFrame(Svr_connect["ds"]))
Svr_connect["trend_off"]=Svr_connect['y'] - Svr_connect_predict['trend']

Svr_connect_test = pd.concat([test['Timestamp'],test["Svr_connect"]],axis = 1)
Svr_connect_test = Svr_connect_test.rename(columns = {'Timestamp' : 'ds', "Svr_connect" : 'y'})
Svr_connect_test_model = Prophet()
Svr_connect_test_model.fit(Svr_connect_test)
Svr_connect_test_predict = Svr_connect_test_model.predict(pd.DataFrame(Svr_connect_test["ds"]))
Svr_connect_test["trend_off"]=Svr_connect_test['y'] - Svr_connect_test_predict['trend']

# ##### Ss_request
Ss_request = pd.concat([train['Timestamp'],train["Ss_request"]],axis = 1)
Ss_request = Ss_request.rename(columns = {'Timestamp' : 'ds', "Ss_request" : 'y'})
Ss_request_model = Prophet()
Ss_request_model.fit(Ss_request)
Ss_request_predict = Ss_request_model.predict(pd.DataFrame(Ss_request["ds"]))
Ss_request["trend_off"]=Ss_request['y'] - Ss_request_predict['trend']

Ss_request_test = pd.concat([test['Timestamp'],test["Ss_request"]],axis = 1)
Ss_request_test = Ss_request_test.rename(columns = {'Timestamp' : 'ds', "Ss_request" : 'y'})
Ss_request_test_model = Prophet()
Ss_request_test_model.fit(Ss_request_test)
Ss_request_test_predict = Ss_request_test_model.predict(pd.DataFrame(Ss_request_test["ds"]))
Ss_request_test["trend_off"]=Ss_request_test['y'] - Ss_request_test_predict['trend']

# ##### Ss_Established
Ss_Established = pd.concat([train['Timestamp'],train["Ss_Established"]],axis = 1)
Ss_Established = Ss_Established.rename(columns = {'Timestamp' : 'ds', "Ss_Established" : 'y'})
Ss_Established_model = Prophet()
Ss_Established_model.fit(Ss_Established)
Ss_Established_predict = Ss_Established_model.predict(pd.DataFrame(Ss_Established["ds"]))
Ss_Established["trend_off"]=Ss_Established['y'] - Ss_Established_predict['trend']

Ss_Established_test = pd.concat([test['Timestamp'],test["Ss_Established"]],axis = 1)
Ss_Established_test = Ss_Established_test.rename(columns = {'Timestamp' : 'ds', "Ss_Established" : 'y'})
Ss_Established_test_model = Prophet()
Ss_Established_test_model.fit(Ss_Established_test)
Ss_Established_test_predict = Ss_Established_test_model.predict(pd.DataFrame(Ss_Established_test["ds"]))
Ss_Established_test["trend_off"]=Ss_Established_test['y'] - Ss_Established_test_predict['trend']

# ##### 추세 제거된 데이터로 대체

train['Svr_detect'] = Svr_detect['trend_off']
train['Svr_connect'] = Svr_connect['trend_off']
train['Ss_request'] = Ss_request['trend_off']
train['Ss_Established'] = Ss_Established['trend_off']

test['Svr_detect'] = Svr_detect_test['trend_off']
test['Svr_connect'] = Svr_connect_test['trend_off']
test['Ss_request'] = Ss_request_test['trend_off']
test['Ss_Established'] = Ss_Established_test['trend_off']

## 스케일러

In [43]:
# train.drop(columns = ["Timestamp","month"],inplace = True)
# test.drop(columns = ["Timestamp","month"],inplace = True)

# data = pd.concat([train,test])
# data = data.reset_index()
# data.drop(columns = ["index"],inplace = True)

# from sklearn.preprocessing import MinMaxScaler
# model = MinMaxScaler()
# model = model.fit(data)

# train = model.transform(train)
# test = model.transform(test)

In [44]:
# np.save('/content/gdrive/MyDrive/KT/data/IP/train',train)
# np.save('/content/gdrive/MyDrive/KT/data/IP/test',test)

In [45]:
train = np.load('/content/gdrive/MyDrive/KT/data/IP/train.npy')
test = np.load('/content/gdrive/MyDrive/KT/data/IP/test.npy')

## 텐서변환

In [46]:
train_data = torch.Tensor(np.array(train))
test_data = torch.Tensor(np.array(test))

# 모델 구현

## 모델 구성

In [47]:
class Model(nn.Module) :
    def __init__(self, input_size, hidden_size, output_size) :
        super(Model, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.Encoder = nn.Sequential(
        
            nn.Linear(input_size,hidden_size[0]),
            nn.RReLU(),
            nn.Linear(hidden_size[0],output_size),
            nn.RReLU()
        )

        self.Decoder = nn.Sequential(

            nn.Linear(output_size,hidden_size[0]),
            nn.RReLU(),
            nn.Linear(hidden_size[0],input_size)
        )

    def forward(self, x) :
        x = self.Encoder(x)
        x = self.Decoder(x)

        return x

In [48]:
input_size = 4
hidden_size = [3]
output_size = 2

model = Model(input_size, hidden_size, output_size)
print(list(model.modules()))

[Model(
  (Encoder): Sequential(
    (0): Linear(in_features=4, out_features=3, bias=True)
    (1): RReLU(lower=0.125, upper=0.3333333333333333)
    (2): Linear(in_features=3, out_features=2, bias=True)
    (3): RReLU(lower=0.125, upper=0.3333333333333333)
  )
  (Decoder): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): RReLU(lower=0.125, upper=0.3333333333333333)
    (2): Linear(in_features=3, out_features=4, bias=True)
  )
), Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): RReLU(lower=0.125, upper=0.3333333333333333)
  (2): Linear(in_features=3, out_features=2, bias=True)
  (3): RReLU(lower=0.125, upper=0.3333333333333333)
), Linear(in_features=4, out_features=3, bias=True), RReLU(lower=0.125, upper=0.3333333333333333), Linear(in_features=3, out_features=2, bias=True), RReLU(lower=0.125, upper=0.3333333333333333), Sequential(
  (0): Linear(in_features=2, out_features=3, bias=True)
  (1): RReLU(lower=0.125, upper=0.333333333333

## 학습 함수

In [49]:
loss_function = nn.MSELoss()

def training(model, data,loss_function):
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
    
    dataloader = DataLoader(data, batch_size =64, shuffle =True)
    
    for epoch in range(1, 11): # 에포크는 10번 줌
        
        update_loss = 0.0
        
        for x in dataloader :
            optimizer.zero_grad()
            output = model(x)
            
            loss=loss_function(x, output) #dataloader로 불러온 데이터 값과 실제 데이터 간의 MSE 산출
            loss.backward() # MSE 역전파
            
            optimizer.step()
            update_loss += loss.item()

        print('epoch:', f'{epoch}', '  loss:', f'{update_loss}')
    
    return model

## 학습

In [50]:
Model = training(model, train_data, loss_function)

epoch: 1   loss: 1.1663368161025573
epoch: 2   loss: 0.1277831489642267
epoch: 3   loss: 0.11090515549039992
epoch: 4   loss: 0.10971053906359884
epoch: 5   loss: 0.10993863553812844
epoch: 6   loss: 0.10982950053949025
epoch: 7   loss: 0.1096154835595371
epoch: 8   loss: 0.1101126072499028
epoch: 9   loss: 0.11040568714452093
epoch: 10   loss: 0.11048259888593748


## 임계값 설정

In [51]:
train_loss = []
for data in train_data :
  output = Model(data)
  loss = loss_function(output, data)
  train_loss.append(loss.item())

test_loss = []
for data in test_data :
  output = Model(data)
  loss = loss_function(output, data)
  test_loss.append(loss.item())

In [52]:
treshold = np.array(0.00131)
train_error = list(train_loss >= treshold)
test_error = list(test_loss >= treshold)

print("train_error : ",train_error.count(True))
print("test_error : ",test_error.count(True))

print("Threshold : ",treshold)

train_error :  127
test_error :  137
Threshold :  0.00131


## 임계값 확대

In [53]:
###################### false가 난 구간 확인 #######################
submit = []
for i in range(len(test_loss)) :
    if test_loss[i] >= treshold :
        submit.append(1)
    else :
        submit.append(0)

submit = pd.DataFrame(submit)
submit=submit.rename(columns = {0:'Prediction'})

false = []
for i in range(len(submit)) :
    if submit["Prediction"][i] == 1:
         false.append(i)

####################### 5 연속 false 서칭 #######################

# original_submit = set(false)

# TIME_STEPS = 5

# def create_sequences(values, time_steps=TIME_STEPS):
#     output = []
#     for i in range(len(values) - time_steps + 1):
#         output.append(values[i : (i + time_steps)])
#     return np.stack(output)

# false_seq = create_sequences(false)

# false_tf = []
# for i in range(len(false_seq)) :
#     temp = []
#     ex = false_seq[i]
#     for j in range(len(ex)-1) :
#         temp.append(ex[j+1] - ex[j])
#     false_tf.append(temp)

# false_real_tf = []
# for i in range(len(false_tf)) :
#     false_real_tf.append(false_tf[i] == [1,1,1,1])

# false = set(np.array(false_seq[false_real_tf]).reshape(-1))

# submit_clean = pd.DataFrame()

# submit_clean["Prediction"] = np.zeros(26496)

# for i in false :
#     submit_clean["Prediction"][i] = 1

false = list(false)


################# 국소 예측 모형 형성 ##################
many = 1
false_plus = []
#false_minus = []
for i in range(len(false)) :
    false_plus.append(false[i] + many)
    #false_minus.append(false[i]-many)

#false_list = set(false) | set(false_plus) | set(false_minus) | set(original_submit)
# false_list = set(false) | set(false_plus)  | set(original_submit)
# false_list = list(false_list)
false_list = set(false) | set(false_plus)
false_list = list(false_list)
false_list.sort()


#################### 제출 ############################
submit_many = pd.DataFrame()
submit_many["Prediction"] = np.zeros(26496)

for i in false :
    submit_many["Prediction"][i] = 1

for i in false_list :
    submit_many["Prediction"][i] = 1

submit_many["Prediction"].value_counts()

0.0    26319
1.0      177
Name: Prediction, dtype: int64

## 결과도출

In [55]:
submit_many.to_csv("/content/gdrive/MyDrive/KT/result.csv")