In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
import matplotlib.pyplot as plt
import os
from keras.layers import Dropout
from keras.layers import Input
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/MyDrive/project/data_set.csv')
df.head()

Unnamed: 0,membrane area,ch4,co2,n2,o2,mass flow,lhv,pressure,diameter,efficiency
0,1.98,58.4,33.9,3.8,1.1,8.71,45.7,10,0.42,63.26
1,2.08,58.4,33.9,3.8,1.1,8.71,45.7,10,0.42,63.41
2,2.18,58.4,33.9,3.8,1.1,8.71,45.7,10,0.42,63.49
3,2.29,58.4,33.9,3.8,1.1,8.71,45.7,10,0.42,63.54
4,1.68,58.4,33.9,3.8,1.1,8.71,45.7,10,0.45,62.69


RBF

In [4]:
!pip install scikit-learn
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics.pairwise import rbf_kernel
# Import KMeans
from sklearn.cluster import KMeans # Importing the KMeans class from sklearn.cluster
import os




In [5]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
# Import rbf_kernel from the correct module
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


In [6]:

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)

    X = df.drop('efficiency', axis=1)
    y = df['efficiency']

    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_scaled = scaler.fit_transform(X)

    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    return (X_train, y_train), (X_val, y_val), (X_test, y_test), scaler

class RBFNetwork:
    def __init__(self, num_centers, gamma=1.0):
        self.num_centers = num_centers
        self.gamma = gamma
        self.centers = None
        self.regressor = LinearRegression()
        self.kmeans = KMeans(n_clusters=num_centers, random_state=42)

    def _compute_rbf_features(self, X):
        """Compute RBF kernel features for given input."""
        if self.centers is None:
            raise ValueError("Model not fitted yet. Call fit() first.")
        return rbf_kernel(X, self.centers, gamma=self.gamma)

    def fit(self, X, y):
        """
        Fit the RBF Network to the training data.

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Training data
        y : array-like of shape (n_samples,)
            Target values
        """
        # step 1: Use KMeans to find RBF centers
        self.kmeans.fit(X)
        self.centers = self.kmeans.cluster_centers_

        # step 2: Compute the RBF features for training data
        rbf_features = self._compute_rbf_features(X)

        # step 3: Train the linear regressor on the transformed features
        self.regressor.fit(rbf_features, y)

        return self

    def predict(self, X):
        """
        Predict using the RBF Network model.

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Samples to predict

        Returns:
        --------
        y_pred : array-like of shape (n_samples,)
            Predicted values
        """
        rbf_features = self._compute_rbf_features(X)
        return self.regressor.predict(rbf_features)

    def score(self, X, y):
        """Return the coefficient of determination R^2 of the prediction."""
        return r2_score(y, self.predict(X))

def train_and_evaluate_rbf(X_train, y_train, X_val, y_val, X_test, y_test, num_centers=5, gamma=0.5):
    """
    Train and evaluate the RBF network with given parameters.

    Parameters:
    -----------
    X_train, y_train : Training data
    X_val, y_val : Validation data
    X_test, y_test : Test data
    num_centers : int, number of RBF centers
    gamma : float, RBF kernel width parameter
    """
    # initialize and train the RBF network
    rbf_net = RBFNetwork(num_centers=num_centers, gamma=gamma)
    rbf_net.fit(X_train, y_train)

    results = {}
    for name, X, y in [('Train', X_train, y_train),
                      ('Validation', X_val, y_val),
                      ('Test', X_test, y_test)]:
        y_pred = rbf_net.predict(X)
        mse = mean_squared_error(y, y_pred)
        r2 = r2_score(y, y_pred)
        results[name] = {'MSE': mse, 'R2': r2}
        print(f"{name} MSE: {mse:.4f}")
        print(f"{name} R2: {r2:.4f}")

    return rbf_net, results


# Main function
def main():
    file_path = '/content/drive/MyDrive/project/data_set.csv'
    (X_train, y_train), (X_val, y_val), (X_test, y_test), scaler = load_and_preprocess_data(file_path)


    # train and evaluate model
    rbf_net, results = train_and_evaluate_rbf(
        X_train, y_train,
        X_val, y_val,
        X_test, y_test,
        num_centers=5,
        gamma=0.5
    )

    return rbf_net, results, scaler

rbf_net, results, scaler = main()
# print('Results: ', results)

Train MSE: 0.9437
Train R2: 0.9125
Validation MSE: 0.5391
Validation R2: 0.9581
Test MSE: 0.8379
Test R2: 0.9280


Moving K means clusturing

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Moving K-Means Clustering
class MovingKMeans:
    def __init__(self, num_centers, a0=0.2, max_iter=100):
        self.num_centers = num_centers
        self.a0 = a0
        self.max_iter = max_iter
        self.centers = None

    def fit(self, X):
        N, _ = X.shape
        self.centers = X[np.random.choice(N, self.num_centers, replace=False)]
        a = self.a0
        for _ in range(self.max_iter):
            clusters = {i: [] for i in range(self.num_centers)}
            for xi in X:
                distances = np.linalg.norm(xi - self.centers, axis=1)
                j = np.argmin(distances)
                clusters[j].append(xi)

            new_centers = []
            for j in range(self.num_centers):
                if clusters[j]:
                    new_centers.append(np.mean(clusters[j], axis=0))
                else:
                    new_centers.append(self.centers[j])

            self.centers = np.array(new_centers)
            a -= a / self.num_centers

    def get_centers(self):
        return self.centers


def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)

    X = df.drop('efficiency', axis=1)
    y = df['efficiency']

    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_scaled = scaler.fit_transform(X)

    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    return (X_train, y_train), (X_val, y_val), (X_test, y_test), scaler


class RBFNetwork:
    def __init__(self, num_centers, gamma=1.0):
        self.num_centers = num_centers
        self.gamma = gamma
        self.centers = None
        self.regressor = LinearRegression()
        self.kmeans = MovingKMeans(num_centers=num_centers)

    def _compute_rbf_features(self, X):
        if self.centers is None:
            raise ValueError("Model not fitted yet. Call fit() first.")
        return np.exp(-self.gamma * np.linalg.norm(X[:, np.newaxis] - self.centers, axis=2) ** 2)

    def fit(self, X, y):
        self.kmeans.fit(X)
        self.centers = self.kmeans.get_centers()
        rbf_features = self._compute_rbf_features(X)
        self.regressor.fit(rbf_features, y)
        return self

    def predict(self, X):
        rbf_features = self._compute_rbf_features(X)
        return self.regressor.predict(rbf_features)

    def score(self, X, y):
        return r2_score(y, self.predict(X))


def train_and_evaluate_rbf(X_train, y_train, X_val, y_val, X_test, y_test, num_centers=5, gamma=0.5):
    rbf_net = RBFNetwork(num_centers=num_centers, gamma=gamma)
    rbf_net.fit(X_train, y_train)

    results = {}
    for name, X, y in [('Train', X_train, y_train),
                      ('Validation', X_val, y_val),
                      ('Test', X_test, y_test)]:
        y_pred = rbf_net.predict(X)
        mse = mean_squared_error(y, y_pred)
        r2 = r2_score(y, y_pred)
        results[name] = {'MSE': mse, 'R2': r2}
        print(f"{name} MSE: {mse:.4f}")
        print(f"{name} R2: {r2:.4f}")

    return rbf_net, results


# Main function
def main():
    file_path = '/content/drive/MyDrive/project/data_set.csv'
    (X_train, y_train), (X_val, y_val), (X_test, y_test), scaler = load_and_preprocess_data(file_path)

    rbf_net, results = train_and_evaluate_rbf(
        X_train, y_train,
        X_val, y_val,
        X_test, y_test,
        num_centers=5,
        gamma=0.5
    )

    return rbf_net, results, scaler


rbf_net, results, scaler = main()


Train MSE: 0.8486
Train R2: 0.9213
Validation MSE: 0.6687
Validation R2: 0.9481
Test MSE: 0.3524
Test R2: 0.9697


hyper parameter tuning of RBF

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import rbf_kernel


# Load and preprocess dataset
def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    X = df.drop('efficiency', axis=1).values
    y = df['efficiency'].values

    # Scale features to [-1, 1]
    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_scaled = scaler.fit_transform(X)

    # Split data into train, val, and test
    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    return (X_train, y_train), (X_val, y_val), (X_test, y_test), scaler


# RBF Network definition
class RBFNetwork:
    def __init__(self, num_centers, gamma=1.0):
        self.num_centers = num_centers
        self.gamma = gamma
        self.centers = None
        self.regressor = LinearRegression()
        self.kmeans = KMeans(n_clusters=num_centers, random_state=42)

    def _compute_rbf_features(self, X):
        if self.centers is None:
            raise ValueError("Model not fitted yet. Call fit() first.")
        return rbf_kernel(X, self.centers, gamma=self.gamma)

    def fit(self, X, y):
        self.kmeans.fit(X)
        self.centers = self.kmeans.cluster_centers_
        rbf_features = self._compute_rbf_features(X)
        self.regressor.fit(rbf_features, y)
        return self

    def predict(self, X):
        rbf_features = self._compute_rbf_features(X)
        return self.regressor.predict(rbf_features)


# Grid search over spread (σ) and num_centers
def cross_val_rbf_grid_spread(X, y, spread_values, center_values, k=5):
    best_params = {}
    best_rmse = float('inf')
    best_metrics = {}

    for num_centers in center_values:
        for spread in spread_values:
            gamma = 1 / (2 * spread ** 2)
            print(f"Testing num_centers={num_centers}, spread={spread} (gamma={gamma:.4f})")
            kf = KFold(n_splits=k, shuffle=True, random_state=42)
            rmses, mses, r2s = [], [], []

            for train_idx, val_idx in kf.split(X):
                X_train_cv, X_val_cv = X[train_idx], X[val_idx]
                y_train_cv, y_val_cv = y[train_idx], y[val_idx]

                model = RBFNetwork(num_centers=num_centers, gamma=gamma)
                model.fit(X_train_cv, y_train_cv)
                y_pred = model.predict(X_val_cv)

                mse = mean_squared_error(y_val_cv, y_pred)
                rmse = np.sqrt(mse)
                r2 = r2_score(y_val_cv, y_pred)

                rmses.append(rmse)
                mses.append(mse)
                r2s.append(r2)

            avg_rmse = np.mean(rmses)
            avg_mse = np.mean(mses)
            avg_r2 = np.mean(r2s)

            print(f"  → Avg RMSE: {avg_rmse:.4f}, MSE: {avg_mse:.4f}, R2: {avg_r2:.4f}\n")

            if avg_rmse < best_rmse:
                best_rmse = avg_rmse
                best_params = {'spread': spread, 'gamma': gamma, 'num_centers': num_centers}
                best_metrics = {'RMSE': avg_rmse, 'MSE': avg_mse, 'R2': avg_r2}

    print(f"Best Parameters:")
    print(f"   Spread (σ)       : {best_params['spread']}")
    print(f"   Gamma            : {best_params['gamma']:.4f}")
    print(f"   Num of Centers   : {best_params['num_centers']}")
    print(f"Best CV RMSE     : {best_metrics['RMSE']:.4f}")
    print(f"   MSE              : {best_metrics['MSE']:.4f}")
    print(f"   R2               : {best_metrics['R2']:.4f}")
    return best_params, best_metrics


# Main runner
def main():
    file_path = '/content/drive/MyDrive/project/data_set.csv'  # Update path as needed
    (X_train, y_train), (X_val, y_val), (X_test, y_test), scaler = load_and_preprocess_data(file_path)

    # Combine training + validation for grid search
    X_full_train = np.vstack((X_train, X_val))
    y_full_train = np.hstack((y_train, y_val))

    spread_values = np.linspace(1, 10, 10)   # Spread (σ) values
    center_values = [10, 20, 30, 40]          # Number of RBF centers

    # Grid search
    best_params, best_cv_metrics = cross_val_rbf_grid_spread(X_full_train, y_full_train, spread_values, center_values, k=5)

    # Final training with best params

if __name__ == '__main__':
    main()


Testing num_centers=10, spread=1.0 (gamma=0.5000)
  → Avg RMSE: 0.6001, MSE: 0.3630, R2: 0.9671

Testing num_centers=10, spread=2.0 (gamma=0.1250)
  → Avg RMSE: 0.7599, MSE: 0.5984, R2: 0.9444

Testing num_centers=10, spread=3.0 (gamma=0.0556)
  → Avg RMSE: 0.9285, MSE: 0.8950, R2: 0.9171

Testing num_centers=10, spread=4.0 (gamma=0.0312)
  → Avg RMSE: 1.0096, MSE: 1.0514, R2: 0.9032

Testing num_centers=10, spread=5.0 (gamma=0.0200)
  → Avg RMSE: 1.0402, MSE: 1.1099, R2: 0.8983

Testing num_centers=10, spread=6.0 (gamma=0.0139)
  → Avg RMSE: 1.0512, MSE: 1.1301, R2: 0.8968

Testing num_centers=10, spread=7.0 (gamma=0.0102)
  → Avg RMSE: 1.0552, MSE: 1.1369, R2: 0.8964

Testing num_centers=10, spread=8.0 (gamma=0.0078)
  → Avg RMSE: 1.0565, MSE: 1.1388, R2: 0.8963

Testing num_centers=10, spread=9.0 (gamma=0.0062)
  → Avg RMSE: 1.0568, MSE: 1.1389, R2: 0.8964

Testing num_centers=10, spread=10.0 (gamma=0.0050)
  → Avg RMSE: 1.0567, MSE: 1.1384, R2: 0.8965

Testing num_centers=20, sprea

Hper-parameter Tuning of Moving K means

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


class MovingKMeans:
    def __init__(self, num_centers, a0=0.2, max_iter=100):
        self.num_centers = num_centers
        self.a0 = a0
        self.max_iter = max_iter
        self.centers = None

    def fit(self, X):
        N, _ = X.shape
        self.centers = X[np.random.choice(N, self.num_centers, replace=False)]
        a = self.a0
        for _ in range(self.max_iter):
            clusters = {i: [] for i in range(self.num_centers)}
            for xi in X:
                distances = np.linalg.norm(xi - self.centers, axis=1)
                j = np.argmin(distances)
                clusters[j].append(xi)

            new_centers = []
            for j in range(self.num_centers):
                if clusters[j]:
                    new_centers.append(np.mean(clusters[j], axis=0))
                else:
                    new_centers.append(self.centers[j])

            self.centers = np.array(new_centers)
            a -= a / self.num_centers

    def get_centers(self):
        return self.centers


class RBFNetwork:
    def __init__(self, num_centers, gamma=1.0):
        self.num_centers = num_centers
        self.gamma = gamma
        self.centers = None
        self.regressor = LinearRegression()
        self.kmeans = MovingKMeans(num_centers=num_centers)

    def _compute_rbf_features(self, X):
        if self.centers is None:
            raise ValueError("Model not fitted yet. Call fit() first.")
        return np.exp(-self.gamma * np.linalg.norm(X[:, np.newaxis] - self.centers, axis=2) ** 2)

    def fit(self, X, y):
        self.kmeans.fit(X)
        self.centers = self.kmeans.get_centers()
        rbf_features = self._compute_rbf_features(X)
        self.regressor.fit(rbf_features, y)
        return self

    def predict(self, X):
        rbf_features = self._compute_rbf_features(X)
        return self.regressor.predict(rbf_features)

    def score(self, X, y):
        return r2_score(y, self.predict(X))


def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)

    X = df.drop('efficiency', axis=1).values
    y = df['efficiency'].values

    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y, scaler


def cross_val_rbf_grid_spread(X, y, spread_values, center_values, k=5):
    best_params = {}
    best_rmse = float('inf')
    best_metrics = {}

    for num_centers in center_values:
        for spread in spread_values:
            gamma = 1 / (2 * spread ** 2)
            print(f"Testing num_centers={num_centers}, spread={spread:.2f} (gamma={gamma:.4f})")
            kf = KFold(n_splits=k, shuffle=True, random_state=42)
            rmses, mses, r2s = [], [], []

            for train_idx, val_idx in kf.split(X):
                X_train_cv, X_val_cv = X[train_idx], X[val_idx]
                y_train_cv, y_val_cv = y[train_idx], y[val_idx]

                model = RBFNetwork(num_centers=num_centers, gamma=gamma)
                model.fit(X_train_cv, y_train_cv)
                y_pred = model.predict(X_val_cv)

                mse = mean_squared_error(y_val_cv, y_pred)
                rmse = np.sqrt(mse)
                r2 = r2_score(y_val_cv, y_pred)

                rmses.append(rmse)
                mses.append(mse)
                r2s.append(r2)

            avg_rmse = np.mean(rmses)
            avg_mse = np.mean(mses)
            avg_r2 = np.mean(r2s)

            print(f"  → Avg RMSE: {avg_rmse:.4f}, MSE: {avg_mse:.4f}, R2: {avg_r2:.4f}\n")

            if avg_rmse < best_rmse:
                best_rmse = avg_rmse
                best_params = {'spread': spread, 'gamma': gamma, 'num_centers': num_centers}
                best_metrics = {'RMSE': avg_rmse, 'MSE': avg_mse, 'R2': avg_r2}

    print("\n Best Parameters:")
    print(f"   Spread (σ)       : {best_params['spread']}")
    print(f"   Gamma            : {best_params['gamma']:.4f}")
    print(f"   Num of Centers   : {best_params['num_centers']}")
    print(" Final Cross-Validated Metrics:")
    print(f"   RMSE             : {best_metrics['RMSE']:.4f}")
    print(f"   MSE              : {best_metrics['MSE']:.4f}")
    print(f"   R2               : {best_metrics['R2']:.4f}")
    return best_params, best_metrics


def main():
    file_path = '/content/drive/MyDrive/project/data_set.csv'
    X, y, scaler = load_and_preprocess_data(file_path)

    spread_values = np.linspace(1, 10, 10)   # Spread values
    center_values = [10, 20, 30, 40, 50]          # Number of centers

    best_params, best_metrics = cross_val_rbf_grid_spread(X, y, spread_values, center_values, k=5)

    # Optional: Train final model on full dataset with best params
    final_model = RBFNetwork(num_centers=best_params['num_centers'], gamma=best_params['gamma'])
    final_model.fit(X, y)

    return final_model, best_params, best_metrics, scaler


# Run everything
final_model, best_params, best_metrics, scaler = main()


Testing num_centers=10, spread=1.00 (gamma=0.5000)
  → Avg RMSE: 0.7672, MSE: 0.6472, R2: 0.9423

Testing num_centers=10, spread=2.00 (gamma=0.1250)
  → Avg RMSE: 0.8545, MSE: 0.7366, R2: 0.9329

Testing num_centers=10, spread=3.00 (gamma=0.0556)
  → Avg RMSE: 0.9055, MSE: 0.8650, R2: 0.9197

Testing num_centers=10, spread=4.00 (gamma=0.0312)
  → Avg RMSE: 1.0512, MSE: 1.1229, R2: 0.8984

Testing num_centers=10, spread=5.00 (gamma=0.0200)
  → Avg RMSE: 1.0080, MSE: 1.0237, R2: 0.9063

Testing num_centers=10, spread=6.00 (gamma=0.0139)
  → Avg RMSE: 1.0337, MSE: 1.0729, R2: 0.9028

Testing num_centers=10, spread=7.00 (gamma=0.0102)
  → Avg RMSE: 1.0533, MSE: 1.1135, R2: 0.8985

Testing num_centers=10, spread=8.00 (gamma=0.0078)
  → Avg RMSE: 1.0933, MSE: 1.2095, R2: 0.8908

Testing num_centers=10, spread=9.00 (gamma=0.0062)
  → Avg RMSE: 1.0614, MSE: 1.1387, R2: 0.8969

Testing num_centers=10, spread=10.00 (gamma=0.0050)
  → Avg RMSE: 1.0900, MSE: 1.2059, R2: 0.8910

Testing num_centers