In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import pairwise_distances
import joblib
import warnings

warnings.filterwarnings("ignore")

# بارگذاری داده‌ها
df1 = pd.read_excel('output_lube_oil_g11.xlsx')
selected_columns = ['AssetID_8341', 'AssetID_8342', 'AssetID_8343', 'AssetID_8344',
                    'AssetID_8346', 'AssetID_9286', 'AssetID_9287']
data_to_scale = df1[selected_columns]

# استانداردسازی
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data_to_scale)
scaled_df = pd.DataFrame(scaled_data, columns=selected_columns)
scaled_df_clean = scaled_df.dropna()

# اجرای DBSCAN
dbscan = DBSCAN(eps=0.7, min_samples=6)
labels = dbscan.fit_predict(scaled_df_clean)

# جدا کردن داده‌های نویز و خوشه‌ها
df = scaled_df_clean.copy()
df['label'] = labels
noise_mask = df['label'] == -1
cluster_mask = df['label'] != -1
noise_points = df[noise_mask].drop(columns='label').values
cluster_points = df[cluster_mask].drop(columns='label').values

# محاسبه فاصله و وزن ناهنجاری
distances = pairwise_distances(noise_points, cluster_points)
min_distances = distances.min(axis=1)
normalized_weights = (min_distances - min_distances.min()) / (min_distances.max() - min_distances.min())

# ساخت بردار وزن ناهنجاری
anomaly_weights = np.zeros(len(df))
anomaly_weights[noise_mask.values] = normalized_weights
df['anomaly_weight'] = anomaly_weights

# ذخیره مدل‌ها و داده‌های مرجع
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(dbscan, 'dbscan_model.pkl')
np.save('cluster_points.npy', cluster_points)
