In [2]:
import numpy as np

# Gaussian RBF function
def gaussian_rbf(x, center, sigma):
    return np.exp(-np.linalg.norm(x - center) ** 2 / (2 * sigma ** 2))

# RBFN class
class RBFN:
    def __init__(self, input_dim, num_centers, output_dim):
        self.input_dim = input_dim
        self.num_centers = num_centers
        self.output_dim = output_dim

        # Randomly select centers from the training dataset
        self.centers = [np.random.rand(input_dim) for _ in range(num_centers)]
        self.sigmas = [1.0 for _ in range(num_centers)]     # Set sigma to 1 for simplicity
        self.weights = np.random.randn(self.num_centers, output_dim)

    def _basis_function(self, data_point):
        return np.array([gaussian_rbf(data_point, c, s) for c, s in zip(self.centers, self.sigmas)])

    def _calculate_interpolation_matrix(self, X):
        G = np.zeros((X.shape[0], self.num_centers))
        for i, data_point in enumerate(X):
            G[i, :] = self._basis_function(data_point)
        return G

    def train(self, X, Y):
        # Choose training data as centers
        self.centers = X.copy()
        self.sigmas = [1.0 for _ in range(self.num_centers)]
        G = self._calculate_interpolation_matrix(X)
        # Least squares solution
        self.weights = np.dot(np.linalg.pinv(G), Y)

    def predict(self, X):
        G = self._calculate_interpolation_matrix(X)
        predictions = np.dot(G, self.weights)
        return predictions

# XOR dataset
X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[0],[1],[1],[0]])

# Create and train RBFN
model = RBFN(input_dim=2, num_centers=4, output_dim=1)
model.train(X, Y)

# Predict
print("XOR Predictions using RBFN:")
for x in X:
    y_pred = model.predict(np.array([x]))
    print(f"Input: {x} => Predicted: {round(y_pred[0][0])} (Raw: {y_pred[0][0]:.4f})")


XOR Predictions using RBFN:
Input: [0 0] => Predicted: 0 (Raw: 0.0000)
Input: [0 1] => Predicted: 1 (Raw: 1.0000)
Input: [1 0] => Predicted: 1 (Raw: 1.0000)
Input: [1 1] => Predicted: 0 (Raw: 0.0000)


In [2]:
#using CSV file 

# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
data = pd.read_csv('‪C:/Users/Pradnya/Downloads/wine_data.csv')
print(data.head())

# Split features and labels
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.01, random_state=42)

# Define RBF kernel function
def rbf_kernel(x1, x2, gamma=1.0):
    diff = x1 - x2
    return np.exp(-gamma * np.dot(diff, diff.T))

# Define RBF classifier using nearest center approach
def rbf_classifier(X_train, y_train, X_test, gamma):
    predictions = []
    
    for test_sample in X_test:
        distances = []
        for train_sample in X_train:
            distance = rbf_kernel(test_sample, train_sample, gamma)
            distances.append(distance)
        
        closest_index = np.argmax(distances)
        predictions.append(y_train[closest_index])
    
    return np.array(predictions)

# Run classifier
predictions = rbf_classifier(X_train, y_train, X_test, gamma=0.5)

# Evaluate
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy * 100:.2f}%')


OSError: [Errno 22] Invalid argument: '\u202aC:/Users/Pradnya/Downloads/wine_data.csv'

In [3]:
!pip install scikit-learn


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.1.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
#using bank dataset by kmeans
import math
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans

# Load and preprocess data
Data = pd.read_csv("C:/Users/Pradnya/Downloads/bank-full.csv")
cols = ["age", "balance", "day", "duration", "campaign", "pdays", "previous"]
data_encode = Data.drop(cols, axis=1)
data_encode = data_encode.apply(LabelEncoder().fit_transform)
data_rest = Data[cols]
Data = pd.concat([data_rest, data_encode], axis=1)
print(Data.head())

# Train-test split
data_train, data_test = train_test_split(Data, test_size=0.5, random_state=4)
x_train = data_train.drop("y", axis=1)
y_train = data_train["y"]
x_test = data_test.drop("y", axis=1)
y_test = data_test["y"]

# Normalize
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

# KMeans for center selection
K_cent = 8
km = KMeans(n_clusters=K_cent, max_iter=98)
km.fit(x_train)
cent = km.cluster_centers_

# Compute sigma
max_d = 0
for i in range(K_cent):
    for j in range(K_cent):
        d = np.linalg.norm(cent[i] - cent[j])
        if d > max_d:
            max_d = d
sigma = math.sqrt(2 * K_cent)
print(sigma)

# Compute RBF feature matrix for training set
shape = x_train.shape
row = shape[0]
column = K_cent
G = np.empty((row, column), dtype=float)

for i in range(row):
    for j in range(column):
        dist = np.linalg.norm(x_train[i] - cent[j])
        G[i][j] = math.exp(-math.pow(dist, 2) / math.pow(2 * sigma, 2))

print(G)

# Compute weights using pseudo-inverse
GTG = np.dot(G.T, G)
GTG_inv = np.linalg.inv(GTG)
fac = np.dot(GTG_inv, G.T)
w = np.dot(fac, y_train)
print(w)

# Compute RBF features for test data
row = x_test.shape[0]
G_test = np.empty((row, column), dtype=float)

for i in range(row):
    for j in range(column):
        dist = np.linalg.norm(x_test[i] - cent[j])
        G_test[i][j] = math.exp(-math.pow(dist, 2) / math.pow(2 * sigma, 2))

print(G_test[0])

# Predict
prediction = np.dot(G_test, w)
prediction = 0.5 * (np.sign(prediction - 0.5) + 1)
score = accuracy_score(y_test, prediction)
print(score)


   age  balance  day  duration  campaign  pdays  previous  job  marital  \
0   58     2143    5       261         1     -1         0    4        1   
1   44       29    5       151         1     -1         0    9        2   
2   33        2    5        76         1     -1         0    2        1   
3   47     1506    5        92         1     -1         0    1        1   
4   33        1    5       198         1     -1         0   11        2   

   education  default  housing  loan  contact  month  poutcome  y  
0          2        0        1     0        2      8         3  0  
1          1        0        1     0        2      8         3  0  
2          1        0        1     1        2      8         3  0  
3          3        0        1     0        2      8         3  0  
4          3        0        0     0        2      8         3  0  
4.0
[[0.43087774 0.51788175 0.87246038 ... 0.56174452 0.58393056 0.60717375]
 [0.60268249 0.7841429  0.67667375 ... 0.77370279 0.87499958 0.7

In [7]:
#using wine dataset for kmeans
import math
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans

# Load data
data = pd.read_csv("C:/Users/Pradnya/Downloads/wine_data.csv")

# Split dataset
data_train, data_test = train_test_split(data, test_size=0.5, random_state=4)
x_train = data_train.drop("quality", axis=1)
y_train = data_train["quality"]
x_test = data_test.drop("quality", axis=1)
y_test = data_test["quality"]

# Feature Scaling
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

# KMeans Clustering
K_cent = 4
km = KMeans(n_clusters=K_cent, max_iter=98)
km.fit(x_train)
cent = km.cluster_centers_

# Calculate sigma
max_dist = 0
for i in range(K_cent):
    for j in range(K_cent):
        d = np.linalg.norm(cent[i] - cent[j])
        if d > max_dist:
            max_dist = d

# Fix for division by zero
if max_dist == 0:
    sigma = 1e-6  # small fallback value
else:
    sigma = max_dist / math.sqrt(2 * K_cent)

print("Sigma:", sigma)

# Build design matrix G for training set
shape = x_train.shape
row = shape[0]
column = K_cent
G = np.empty((row, column), dtype=float)

for i in range(row):
    for j in range(column):
        dist = np.linalg.norm(x_train[i] - cent[j])
        G[i][j] = math.exp(-math.pow(dist, 2) / math.pow(2 * sigma, 2))

print("Design Matrix G (Training):\n", G)

# Train Weights using Least Squares
GTG = np.dot(G.T, G)
GTG_inv = np.linalg.inv(GTG)
fac = np.dot(GTG_inv, G.T)
w = np.dot(fac, y_train)
print("Weights:\n", w)

# Build design matrix G_test for testing set
row = x_test.shape[0]
G_test = np.empty((row, column), dtype=float)

for i in range(row):
    for j in range(column):
        dist = np.linalg.norm(x_test[i] - cent[j])
        G_test[i][j] = math.exp(-math.pow(dist, 2) / math.pow(2 * sigma, 2))

print("First Row of G_test:\n", G_test[0])

# Predict
prediction = np.dot(G_test, w)
prediction = 0.5 * (np.sign(prediction - 0.5) + 5)
print("Predictions:\n", prediction)
print("Actual:\n", y_test)

# Accuracy
score = accuracy_score(y_test, prediction)
print("Accuracy Score:", score)


Sigma: 1.9995327534016647
Design Matrix G (Training):
 [[0.57937708 0.16097536 0.62094693 0.49965058]
 [0.74489523 0.12076463 0.71525592 0.72719415]
 [0.76068914 0.08221137 0.71548549 0.59259208]
 ...
 [0.75386671 0.22365074 0.74507257 0.88156271]
 [0.54099646 0.21057158 0.56798261 0.65235794]
 [0.68447479 0.15622313 0.64614765 0.84755221]]
Weights:
 [2.54824158 5.63297371 2.46945749 2.83679068]
First Row of G_test:
 [0.42969109 0.07257094 0.43285739 0.65395114]
Predictions:
 [3. 3. 3. ... 3. 3. 3.]
Actual:
 5159     4
16697    8
7588     5
11120    6
16980    8
        ..
12030    7
10301    6
18763    9
9532     6
20917    9
Name: quality, Length: 10500, dtype: int64
Accuracy Score: 0.14876190476190476
