In [1]:
import pandas as pd
import numpy as np

from random import *
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split as split_data
import math
import random
from sklearn.preprocessing import StandardScaler


In [2]:
dataset = pd.read_csv('albrecht.csv')
dataset=dataset.drop(['id'],axis=1)
X = dataset.iloc[ : ,   : -1 ].values
scaler = StandardScaler()
X = scaler.fit_transform(X) 

Y = dataset.iloc[ : ,-1 ].values


In [3]:

def create_random_matrix(maxrow, maxcol): # Creating a Matrix of 1's and 0's
    mymatrix = [ [ None for i in range (maxrow) ] for j in range (maxcol)]
    for row_index in range(0, len(mymatrix)):
        for col_index in range(0, len(mymatrix[row_index])):
            mymatrix[row_index][col_index] = randint(0, 1)
    return mymatrix

def select_features(matrix,index): # Taking features having value '1' in matrix row
    select=[]
    for i in range(len(matrix[0])):
        if matrix[index][i]==1:
          select.append(i)
    return select

def fitness(matrix,index): # Calculating Fitness of perticular Row in Matrix
    X_data=X[:, select_features(matrix,index)]
    Y_data=Y 
    X_train, X_test, Y_train, Y_test = split_data( X_data, Y_data, test_size = 1/4, random_state = 80, shuffle=True) 
    regressor = LinearRegression()
    regressor = regressor.fit(X_train, Y_train)
    Y_pred = regressor.predict(X_test)

    MSE = np.square(np.subtract(Y_test,Y_pred)).mean() 
    RMSE = math.sqrt(MSE)
    return RMSE

def calculate_fitness(matrix,number_of_samples): # Calculating Fitness of Matrix
    fitness_values=[]
    for i in range(number_of_samples):
        fitness_value=fitness(matrix,i)
        fitness_values.append(fitness_value)
    return fitness_values


def min_max(ele_list): # Finding Min and Max index of a List
    min_index = 0
    max_index = 0
    cur_index = 1
    while cur_index < len(ele_list):
        if ele_list[cur_index] < ele_list[min_index]:
            min_index = cur_index
        if ele_list[cur_index] > ele_list[max_index]:
            max_index= cur_index
        cur_index = cur_index + 1

    return min_index,max_index

def find_best_and_worst(matrix,number_of_samples): # Finding Best and Worst Sample from the Matrix
    fitness_values=calculate_fitness(matrix,number_of_samples)
    return min_max(fitness_values)

def rounding(row):
    new_row=[]
    for i in range(len(row)):
        if row[i]<0.5:
            new_row.append(0)
        else :
            new_row.append(1)
    return new_row

def best_of_all(matrix,number_of_samples):
    fitness_values=calculate_fitness(matrix,number_of_samples)
    mn,mx=min_max(fitness_values)
    features=[]
    for i in range(len(matrix[0])):
        if matrix[mn][i]==1:
            features.append(i)
    return features


In [4]:
number_of_samples=60
matrix=create_random_matrix(X.shape[1],number_of_samples)
alpha=random.uniform(0, 1)
print("alpha value :",alpha)
epochs=10

for _ in range(epochs):
    print(matrix)
    best_index,worst_index=find_best_and_worst(matrix,number_of_samples)
    print("best sample :",best_index," worst sample :",worst_index)
    #Objective Function :
    new_worst=matrix[worst_index]+alpha*(np.array(matrix[best_index])-np.array(matrix[worst_index]))
    temp_mat=matrix
    temp_mat[worst_index]=rounding(new_worst)
    fitness_new=fitness(temp_mat,worst_index)
    fitness_old=fitness(matrix,worst_index)
    # If new fitness value (Error) is less than old fitness value than Replace the old sample 
    if fitness_old > fitness_new:
        matrix=temp_mat

best_sample=best_of_all(matrix,number_of_samples)
print("Best Features are :",best_sample)

alpha value : 0.5282521310159194
[[1, 1, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 1, 0], [1, 0, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 0, 0], [1, 1, 0, 1, 1, 1, 1], [0, 1, 0, 1, 1, 1, 1], [1, 0, 1, 0, 1, 1, 0], [0, 1, 0, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 1, 1, 0], [1, 1, 0, 0, 0, 0, 0], [0, 1, 1, 1, 0, 1, 0], [1, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 0, 1, 0], [1, 1, 0, 0, 0, 0, 1], [1, 0, 1, 0, 0, 1, 1], [0, 0, 1, 0, 1, 1, 0], [0, 1, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 1], [0, 0, 1, 1, 0, 1, 1], [0, 0, 0, 1, 0, 1, 1], [0, 1, 1, 0, 1, 0, 0], [1, 1, 1, 0, 0, 1, 1], [1, 1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 0, 0, 1], [1, 0, 0, 1, 0, 0, 1], [0, 1, 1, 0, 0, 1, 1], [1, 0, 0, 1, 0, 1, 1], [1, 1, 0, 0, 1, 0, 1], [0, 1, 0, 0, 0, 1, 1], [1, 1, 0, 1, 1, 0, 1], [1, 1, 0, 1, 1, 0, 1], [1, 1, 0, 0, 1, 1, 1], [0, 1, 1, 0, 1, 1, 1], [0, 0, 1, 1, 0, 1, 1], [1, 1, 1, 0, 0, 0, 1], [1, 1, 1, 1, 0, 1, 1], [0, 1, 0, 1, 0, 0, 0], [0, 1, 1, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0], 

In [5]:
x_data=X[:, best_sample]
y_data=Y 
x_train, x_test, y_train, y_test = split_data( x_data, y_data, test_size = 1/4, shuffle=True) 

# Linear Regression 
regressor = LinearRegression()
regressor = regressor.fit(x_train, y_train)
y_pred = regressor.predict(x_test)
print(y_pred)
print(y_test)

# Root mean Sqaured Error
MSE = np.square(np.subtract(y_test,y_pred)/np.maximum(np.absolute(y_test),np.absolute(y_pred))).mean() 
# print(MSE)
RMSE = math.sqrt(MSE)
print("Root mean Sqaured Error",RMSE)

[28.19208397 35.09743819 15.86215197 51.85242384 14.31103863 29.75677109]
[19.  12.   7.5 38.1 10.8 12.9]
Root mean Sqaured Error 0.4598464442773875


In [9]:
from sklearn.metrics import mean_absolute_error as MAE

# Predict and evaluate
error = (np.subtract(y_test,y_pred)/np.maximum(np.absolute(y_test),np.absolute(y_pred))).mean()
# error = MAE(y_test, y_pred)
print(f'MAE error is {error}')

MAE error is 0.07243505017508482


In [10]:
# SVM
from sklearn.model_selection import train_test_split as split_data
X_train, X_test, Y_train, Y_test = split_data(x_data, y_data , test_size = 0.2 , shuffle=True)

from sklearn.svm import SVR
from sklearn.metrics import accuracy_score
svr=SVR() 
svr.fit(X_train,Y_train)
Y_pred=svr.predict(X_test)

#Root Mean Squared Error
# MSE = np.square(np.subtract(Y_test,Y_pred)).mean() 
MSE = np.square(np.subtract(Y_test,Y_pred)/np.maximum(np.absolute(Y_test),np.absolute(Y_pred))).mean() 
RMSE = math.sqrt(MSE)
print("Root Mean Square Error:\n")
print(RMSE)

# Mean Absolute Error
from sklearn.metrics import mean_absolute_error as MAE
# error = MAE(Y_test, Y_pred)
error = (np.subtract(Y_test,Y_pred)/np.maximum(np.absolute(Y_test),np.absolute(Y_pred))).mean()
print(f'MAE error is {error}')

Root Mean Square Error:

0.6445267930313846
MAE error is 0.15733532801822178


In [8]:
# Random Forest
from sklearn.model_selection import train_test_split as split_data
x_train, x_test, y_train, y_test = split_data(x_data, y_data, test_size=0.20, shuffle=True)

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
treeModel = DecisionTreeRegressor(max_depth=5, random_state=None)
treeModel.fit(x_train, y_train)
model = RandomForestRegressor(max_depth=5, random_state=None,max_features='auto',max_leaf_nodes=5,n_estimators=50, criterion="absolute_error")
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

# Mean Absolute Error
from sklearn.metrics import mean_absolute_error as MAE
# error = MAE(y_test, y_pred)
error = (np.absolute(np.subtract(y_test,y_pred)/np.maximum(np.absolute(y_test),np.absolute(y_pred)))).mean()
print(f'MAE error is {error}')

# Root Mean Squared Error
# MSE = np.square(np.subtract(y_test,y_pred)).mean() 

MSE = np.square(np.subtract(y_test,y_pred)/np.maximum(np.absolute(y_test),np.absolute(y_pred))).mean() 
RMSE = math.sqrt(MSE)
print("Root Mean Square Error:")
print(RMSE)

MAE error is 0.3281525684616711
Root Mean Square Error:
0.35972764930399703
