<a href="https://colab.research.google.com/github/kenanmorani/Prediction_Of_Total_Prodiced_Biogas_Flow/blob/main/Recursive_SVM_scaled_Feature_selective_for_Biogas_Flow_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import warnings
from sklearn import preprocessing

In [2]:
# Mounting my google drive
from google.colab import drive

In [3]:
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [4]:
#Create DataFrame
ss = pd.read_csv('/content/gdrive/MyDrive/Biogaz_Flow/Real_Plant_Data.csv')

In [5]:
del ss['Date'] # Deleting the 'Date' column
ss.head()

Unnamed: 0,TMF,TKM_percent,TUKM_percent,Alcantine,Fatty_Accid,Total_Biogaz_Production
0,240,46.26,33.15,787.0,244.36,2113
1,120,44.77,22.02,983.0,413.54,1754
2,200,44.45,22.61,740.0,378.6,1884
3,160,44.31,26.64,761.0,353.66,1768
4,200,50.58,23.23,748.0,343.96,1715


In [6]:
d = preprocessing.normalize(ss, axis=0) # Applying normalization on the data

In [7]:
# The scaled / normalized dataset
names = ss.columns
df = pd.DataFrame(d, columns=names)

In [8]:
df.head()

Unnamed: 0,TMF,TKM_percent,TUKM_percent,Alcantine,Fatty_Accid,Total_Biogaz_Production
0,0.012425,0.044726,0.067908,0.040657,0.028022,0.01327
1,0.006213,0.043286,0.045108,0.050782,0.047422,0.011015
2,0.010354,0.042976,0.046316,0.038229,0.043415,0.011832
3,0.008283,0.042841,0.054572,0.039314,0.040555,0.011103
4,0.010354,0.048903,0.047587,0.038642,0.039443,0.01077


In [9]:
#Function to test model performance changes with feature elimination MSE
def Mean_Square_Error(model, x_test, y_test):
	prediction = model.predict(x_test)
	print ("Mean Square error of model:", mean_squared_error(y_test, prediction))

In [10]:
#Setting a parameter for SVM model
C = 1.0

In [11]:
# Identifying the target feature by splitting the dataset
samples = df.filter(['TMF', 'TKM_percent', 'TUKM_percent', 'Alcantine', 'Fatty_Accid'])
scores = df.filter(['Total_Biogaz_Production'])

In [13]:
# Defining the number of features to investigate
nFeatures = len(df.columns) - 1

In [14]:
samples

Unnamed: 0,TMF,TKM_percent,TUKM_percent,Alcantine,Fatty_Accid
0,0.012425,0.044726,0.067908,0.040657,0.028022
1,0.006213,0.043286,0.045108,0.050782,0.047422
2,0.010354,0.042976,0.046316,0.038229,0.043415
3,0.008283,0.042841,0.054572,0.039314,0.040555
4,0.010354,0.048903,0.047587,0.038642,0.039443
...,...,...,...,...,...
389,0.047112,0.045326,0.046091,0.015929,0.018623
390,0.048665,0.036924,0.037549,0.014319,0.012528
391,0.041935,0.069178,0.070345,0.022214,0.007274
392,0.056949,0.096510,0.098143,0.021181,0.005617


In [15]:
scores

Unnamed: 0,Total_Biogaz_Production
0,0.013270
1,0.011015
2,0.011832
3,0.011103
4,0.010770
...,...
389,0.038509
390,0.034929
391,0.036864
392,0.032700


In [16]:
rfeIndex = nFeatures

In [17]:
#Recursively eliminate features based on the lowest weight
while True:
	#Split into training and testing
	x_train, x_test, y_train, y_test = train_test_split(samples, scores, test_size = 0.50, train_size=0.50)
	
	#Create SVM model using a linear kernel
	model = svm.SVR(kernel='linear', C=C).fit(x_train, y_train)
	coef = model.coef_

	#Print co-efficients of features
	for i in range(0, nFeatures):
		print(samples.columns[i-1],":", coef[0][i-1])
	
	#Find the minimum weight among features and eliminate the feature with the smallest weight
	min = coef[0][0]
	index = 0
	for i in range(0, rfeIndex):
		if min > coef[0][i-1]:
			index = index + 1
			min = coef[0][i-1]
	if len(samples.columns) == 1:
		print("After recursive elimination we have the", samples.columns[index], "feature with a score of:", min)
		Mean_Square_Error(model, x_test, y_test)
		break
	else:
		print ("Lowest feature weight is for", samples.columns[index], "with a value of:", min)
		print ("Dropping feature", samples.columns[index])  

		#Drop the feature in the 'samples' dataframe based on the lowest feature index
		samples.drop(samples.columns[index], axis = 1, inplace = True)
		Mean_Square_Error(model, x_test, y_test)
		print ("\n")
		rfeIndex = rfeIndex - 1
		nFeatures = nFeatures - 1

Fatty_Accid : 0.0
TMF : 0.0
TKM_percent : 0.0
TUKM_percent : 0.0
Alcantine : 0.0
Lowest feature weight is for TMF with a value of: 0.0
Dropping feature TMF
Mean Square error of model: 0.0014408657271594882


Fatty_Accid : 0.0
TKM_percent : 0.0
TUKM_percent : 0.0
Alcantine : 0.0
Lowest feature weight is for TKM_percent with a value of: 0.0
Dropping feature TKM_percent
Mean Square error of model: 0.0011554218449178573


Fatty_Accid : 0.0
TUKM_percent : 0.0
Alcantine : 0.0
Lowest feature weight is for TUKM_percent with a value of: 0.0
Dropping feature TUKM_percent
Mean Square error of model: 0.0012260047093989146


Fatty_Accid : 0.0
Alcantine : 0.0
Lowest feature weight is for Alcantine with a value of: 0.0
Dropping feature Alcantine
Mean Square error of model: 0.0012929439739534489


Fatty_Accid : 0.0
After recursive elimination we have the Fatty_Accid feature with a score of: 0.0
Mean Square error of model: 0.0010156954601093036


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
