In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [2]:
np.random.seed(42)
n_samples = 1000
n_features = 5
normal_data = np.random.normal(size=(n_samples, n_features))
anomaly_data = np.random.uniform(low=-10, high=10, size=(int(n_samples * 0.1), n_features))

# 데이터 라벨링 (0: 정상, 1: 이상치)
normal_labels = np.zeros(normal_data.shape[0])
anomaly_labels = np.ones(anomaly_data.shape[0])

# 데이터 합치기 및 셔플
X = np.vstack((normal_data, anomaly_data))
y = np.hstack((normal_labels, anomaly_labels))

# 데이터 프레임으로 변환
data = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))), columns=['feature_{}'.format(i) for i in range(1, n_features + 1)] + ['label'])


In [3]:
data

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,label
0,0.496714,-0.138264,0.647689,1.523030,-0.234153,0.0
1,-0.234137,1.579213,0.767435,-0.469474,0.542560,0.0
2,-0.463418,-0.465730,0.241962,-1.913280,-1.724918,0.0
3,-0.562288,-1.012831,0.314247,-0.908024,-1.412304,0.0
4,1.465649,-0.225776,0.067528,-1.424748,-0.544383,0.0
...,...,...,...,...,...,...
1095,-8.958552,-9.743567,5.910888,-0.457072,-7.382377,1.0
1096,-5.288681,8.650079,7.190060,4.195701,-0.355495,1.0
1097,0.406473,9.504891,9.315409,-5.694619,6.222091,1.0
1098,-6.055147,-5.729250,3.521861,-1.899978,8.630888,1.0


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# RandomForestClassifier 모델 생성 및 훈련
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 테스트 데이터를 사용한 예측
y_pred = model.predict(X_test)

# 성능 평가
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

Confusion Matrix:
 [[307   0]
 [  0  23]]
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       307
         1.0       1.00      1.00      1.00        23

    accuracy                           1.00       330
   macro avg       1.00      1.00      1.00       330
weighted avg       1.00      1.00      1.00       330

Accuracy Score: 1.0


In [5]:
np.random.seed(42)
n_samples = 1000
n_features = 5
normal_data = np.random.normal(size=(n_samples, n_features))
anomaly_data = np.random.uniform(low=-10, high=10, size=(int(n_samples * 0.1), n_features))

# 데이터 라벨링 (0: 정상, 1: 이상치)
normal_labels = np.zeros(normal_data.shape[0])
anomaly_labels = np.ones(anomaly_data.shape[0])

# 데이터 합치기 및 셔플
X = np.vstack((normal_data, anomaly_data))
y = np.hstack((normal_labels, anomaly_labels))

# 데이터 프레임으로 변환
data = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))), columns=['feature_{}'.format(i) for i in range(1, n_features + 1)] + ['label'])

# 훈련 데이터와 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# RandomForestClassifier 모델 생성 및 훈련
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 테스트 데이터를 사용한 예측
y_pred = model.predict(X_test)

# 성능 평가
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

# Out-of-sample 예측
last_five_data = X_test[-5:]
last_five_predictions = model.predict(last_five_data)
print("Last Five Data Out-of-sample Predictions:\n", last_five_predictions)

Confusion Matrix:
 [[307   0]
 [  0  23]]
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       307
         1.0       1.00      1.00      1.00        23

    accuracy                           1.00       330
   macro avg       1.00      1.00      1.00       330
weighted avg       1.00      1.00      1.00       330

Accuracy Score: 1.0
Last Five Data Out-of-sample Predictions:
 [1. 0. 0. 0. 1.]
