In [None]:
import numpy as np
import pandas as pd
from statsmodels.tsa.ar_model import AutoReg
from scipy.stats import zscore

# 假设 `data` 是一个包含时间序列数据的Pandas DataFrame
data = pd.read_csv('..\output\RepLFA_2024_09_27_16_32_02.csv')['untrust_ip_dst_entropy']
train = data[:100]
test = data[100:110]
forecast_horizon =10
# 使用AutoReg自动选择最佳阶数并拟合模型
model = AutoReg(train, lags='aic')
results = model.fit()

# 预测数据
predictions = results.predict(start=len(train), end=len(train)+forecast_horizon, dynamic=False)

# 计算残差
residuals = test.values - predictions

# 使用Z-score检测异常值
z_scores = zscore(residuals)
threshold = 3  # 设定Z-score阈值为3
anomalies = np.where(np.abs(z_scores) > threshold)

# 打印异常值的位置
print("Anomalies detected at indices:", anomalies)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc

# 创建模拟数据
X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10,
                           n_classes=2, random_state=42)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 训练逻辑回归模型
clf = LogisticRegression(random_state=42)
clf.fit(X_train, y_train)

# 预测概率
y_scores = clf.predict_proba(X_test)[:, 1]  # 获取正类的概率

# 计算ROC曲线
fpr, tpr, _ = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)

# 绘制ROC曲线
plt.figure()
plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  # 对角线
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()