In [1]:
!pip install --upgrade --no-cache-dir gdown
!gdown 1sFzjheIoL6-JF7OKvF-ml6zLdyMBHlGr
!unzip air+quality.zip

Downloading...
From: https://drive.google.com/uc?id=1sFzjheIoL6-JF7OKvF-ml6zLdyMBHlGr
To: /content/air+quality.zip
100% 1.54M/1.54M [00:00<00:00, 25.2MB/s]
Archive:  air+quality.zip
  inflating: AirQualityUCI.csv       
  inflating: AirQualityUCI.xlsx      


In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('AirQualityUCI.csv')

# اگر فایل شما Excel است، می‌توانید از این خط استفاده کنید:
# df = pd.read_excel('AirQualityUCI.xlsx')

# ابتدا داده‌ها را به دو دسته train (80%) و temp (20%) تقسیم می‌کنیم
train_data, temp_data = train_test_split(df, test_size=0.2, random_state=42)

# سپس داده‌های temp را به دو بخش validation و test (هر کدام 50% از 20% داده‌ها) تقسیم می‌کنیم
validation_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# حالا سه دسته داریم:
# train_data (60%)
# validation_data (20%)
# test_data (20%)

# نمایش اندازه‌های دسته‌ها
print(f"Train data size: {len(train_data)}")
print(f"Validation data size: {len(validation_data)}")
print(f"Test data size: {len(test_data)}")


Train data size: 7576
Validation data size: 947
Test data size: 948


In [18]:
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import numpy as np

class RBFNetwork:
    def __init__(self, num_centers):
        self.num_centers = num_centers  # تعداد مراکز (نودهای مخفی)
        self.centers = None  # مراکز توابع گوسی
        self.sigma = None  # مقدار سیگما برای گوسی‌ها
        self.weights = None  # وزن‌های لایه خروجی

    def rbf_function(self, X, centers, sigma):
        """محاسبه مقادیر گوسی برای ورودی X"""
        distances = cdist(X, centers)  # فاصله بین نمونه‌ها و مراکز
        return np.exp(- (distances ** 2) / (2 * sigma ** 2))

    def fit(self, X_train, y_train):
        """آموزش مدل با تنظیم مراکز، سیگما و وزن‌های خروجی"""
        # 1. انتخاب مراکز با K-Means
        kmeans = KMeans(n_clusters=self.num_centers, random_state=42, n_init=10)
        kmeans.fit(X_train)
        self.centers = kmeans.cluster_centers_

        # 2. محاسبه سیگما (میانگین فاصله مراکز)
        d_max = np.max(cdist(self.centers, self.centers))  # بیشترین فاصله بین مراکز
        self.sigma = d_max / np.sqrt(2 * self.num_centers)  # مقداردهی اولیه سیگما

        # 3. محاسبه خروجی توابع پایه (لایه مخفی)
        Phi = self.rbf_function(X_train, self.centers, self.sigma)

        # 4. محاسبه وزن‌های خروجی با حداقل مربعات
        self.weights = np.linalg.pinv(Phi) @ y_train  # حل معادله خطی

    def predict(self, X):
        """پیش‌بینی خروجی مدل"""
        Phi = self.rbf_function(X, self.centers, self.sigma)
        return Phi @ self.weights  # ضرب مقدار توابع مخفی در وزن‌های خروجی

# 2. اجرای RBF بر روی داده‌ها
num_centers = 10  # تعداد نودهای مخفی (قابل تنظیم)
rbf_net = RBFNetwork(num_centers=num_centers)
rbf_net.fit(X_train, y_train)

# 3. پیش‌بینی بر روی داده‌های تست
y_pred_rbf = rbf_net.predict(X_test)

# 4. محاسبه خطای مدل RBF
rbf_mse = np.mean((y_test - y_pred_rbf) ** 2)
print(f"RBF Test MSE: {rbf_mse:.6f}")


RBF Test MSE: 0.063533


In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# 1. بارگذاری و پیش‌پردازش داده‌ها
df = pd.read_csv('AirQualityUCI.csv', delimiter=';')
df = df.drop(columns=['Date', 'Time'])  # حذف ستون‌های غیرعددی
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)  # تبدیل مقادیر نامعتبر و جایگزینی NaN با 0

X = df.drop(columns=['CO(GT)']).values
y = df['CO(GT)'].values.reshape(-1, 1)

# 2. تقسیم‌بندی داده‌ها به Train، Validation و Test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# 3. نرمال‌سازی داده‌ها
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train = scaler_X.fit_transform(X_train)
X_val = scaler_X.transform(X_val)
X_test = scaler_X.transform(X_test)
y_train = scaler_y.fit_transform(y_train)
y_val = scaler_y.transform(y_val)
y_test = scaler_y.transform(y_test)

# 4. تعریف تابع عضویت گوسی
def gaussmf(x, mean, sigma):
    return np.exp(-((x - mean) ** 2) / (2 * sigma ** 2))

# 5. تنظیم پارامترهای اولیه ANFIS
num_features = X_train.shape[1]  # تعداد ویژگی‌ها
num_mfs = 2  # تعداد توابع عضویت برای هر ویژگی
num_rules = num_mfs ** num_features  # تعداد قواعد فازی

# Ensure correct broadcasting of min/max values by expanding the shape
means = np.random.uniform(
    np.min(X_train, axis=0)[:, np.newaxis],  # Add a new axis to make shape (num_features, 1)
    np.max(X_train, axis=0)[:, np.newaxis],  # Add a new axis to make shape (num_features, 1)
    size=(num_features, num_mfs)  # Shape: (features, membership_functions)
)

# Small initial sigma values, no changes needed
sigmas = np.random.uniform(0.1, 0.5, size=(num_features, num_mfs))

# مقداردهی اولیه وزن‌های قوانین
rule_weights = np.random.rand(num_rules)

def compute_membership(X, means, sigmas):
    membership = np.zeros((X.shape[0], num_features, num_mfs))
    for i in range(num_features):
        for j in range(num_mfs):
            mean, sigma = means[i, j], sigmas[i, j]
            membership[:, i, j] = gaussmf(X[:, i], mean, sigma)
    return membership

def compute_rules_output(membership):
    rules_output = np.ones((membership.shape[0], num_rules))
    for rule_idx in range(num_rules):
        rule_input = np.unravel_index(rule_idx, (num_mfs,) * num_features)
        rule_memberships = np.ones(membership.shape[0])
        for feature_idx, mf_idx in enumerate(rule_input):
            rule_memberships *= membership[:, feature_idx, mf_idx]
        rules_output[:, rule_idx] = rule_memberships
    return rules_output

# 6. آموزش مدل ANFIS
epochs = 50
learning_rate = 0.1

# Initialization of rule_weights as a vector with the shape (num_rules,)
rule_weights = np.random.rand(num_rules)  # Shape: (num_rules,)

for epoch in range(epochs):
    # Compute membership values for training data
    membership_train = compute_membership(X_train, means, sigmas)

    # Compute the output of the fuzzy rules
    rules_output_train = compute_rules_output(membership_train)

    # Ensure the correct shape for rules_output_train
    assert rules_output_train.shape == (X_train.shape[0], num_rules), f"Shape mismatch: {rules_output_train.shape} vs {(X_train.shape[0], num_rules)}"

    # Compute rule outputs for the training data
    y_pred_train = np.dot(rules_output_train, rule_weights)  # Shape: (num_samples,)
    y_pred_train = y_pred_train.reshape(-1, 1)  # تبدیل به بردار ستونی

    error = y_train - y_pred_train


    # Compute the gradient: (num_rules, 1) = (num_rules, num_samples) . (num_samples, 1)
    gradient = -2 * np.dot(rules_output_train.T, error) / X_train.shape[0]  # Gradient should have shape (num_rules, 1)

    # Ensure the gradient has the correct shape
    assert gradient.shape == (num_rules, 1), f"Gradient shape mismatch: {gradient.shape} vs {(num_rules, 1)}"

    # Flatten the gradient to shape (num_rules,)
    gradient = gradient.flatten()  # Now gradient has shape (num_rules,)

    # Update the rule weights using gradient descent
    rule_weights -= learning_rate * gradient  # Now the shapes should match

    # Compute MSE for debugging and tracking the progress
    mse = np.mean(error ** 2)
    if epoch % 10 == 0:
        print(f"Epoch {epoch}/{epochs}, MSE: {mse}")

# 7. ارزیابی مدل روی تست‌داده‌ها
# Compute membership values for test data
membership_test = compute_membership(X_test, means, sigmas)

# Compute the output of the fuzzy rules for test data
rules_output_test = compute_rules_output(membership_test)

# Predict the test output
y_pred_test = np.dot(rules_output_test, rule_weights)

# Calculate test MSE
test_error = y_test - y_pred_test
test_mse = np.mean(test_error**2)
print(f"Test MSE: {test_mse}")


Epoch 0/50, MSE: 0.6780831934492974
Epoch 10/50, MSE: 0.6780804610062371
Epoch 20/50, MSE: 0.6780777286958433
Epoch 30/50, MSE: 0.6780749965181092
Epoch 40/50, MSE: 0.6780722644730283
Test MSE: 0.6903428738695045
