In [9]:
import time
import pandas as pd
import requests
import pickle
from sqlalchemy import create_engine

# PostgreSQL 연결 설정
DATABASE_URL = "postgresql://admin:admin@localhost:5432/logdb"
engine = create_engine(DATABASE_URL)

# 학습된 모델 불러오기
with open("isolation_forest_model.pkl", "rb") as f:
    model = pickle.load(f)
print("Model loaded successfully!")

# 데이터 전처리 함수
def preprocess_data(data):
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data['hour'] = data['timestamp'].dt.hour
    data['weekday'] = data['timestamp'].dt.weekday
    features = data[['user_id', 'document_id', 'activity_type', 'hour', 'weekday']]
    return pd.get_dummies(features)

# 실시간 로그 모니터링 함수
def monitor_new_logs():
    last_checked = pd.Timestamp.utcnow()

    while True:
        # 새로 유입된 로그 조회
        query = f"SELECT * FROM document_logs WHERE timestamp > '{last_checked}';"
        new_data = pd.read_sql(query, engine)

        if not new_data.empty:
            for _, row in new_data.iterrows():
                log = row.to_dict()

                # 전처리 수행
                processed_log = preprocess_data(pd.DataFrame([log]))

                # 모델로 이상 탐지 수행
                anomaly_score = model.decision_function(processed_log)
                is_anomaly = model.predict(processed_log)

                # 결과를 Flask 서버에 POST 요청으로 전송
                response = requests.post('http://localhost:5000/monitor', json={
                    "anomaly_score": float(anomaly_score[0]),
                    "is_anomaly": bool(is_anomaly[0] == -1)
                })
                print(response.json())

        # 마지막 조회 시간 업데이트
        last_checked = pd.Timestamp.utcnow()
        time.sleep(10)  # 10초마다 새 로그 확인

# 모니터링 시작
monitor_new_logs()


Model loaded successfully!


ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- activity_type_다운로드
- activity_type_삭제
- activity_type_열람
- document_id_D001
- document_id_D002
- ...
