<a href="https://colab.research.google.com/github/kenanmorani/Prediction_Of_Total_Prodiced_Biogas_Flow/blob/main/Recursive_SVM_Feature_selective_for_Biogas_Flow_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [36]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import warnings
from sklearn import preprocessing

In [37]:
# Mounting my google drive
from google.colab import drive

In [38]:
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [39]:
#Create DataFrame
df = pd.read_csv('/content/gdrive/MyDrive/Biogaz_Flow/Real_Plant_Data.csv')

In [40]:
df.head()

Unnamed: 0,Date,TMF,TKM_percent,TUKM_percent,Alcantine,Fatty_Accid,Total_Biogaz_Production
0,1/1/2008,240,46.26,33.15,787.0,244.36,2113
1,1/2/2008,120,44.77,22.02,983.0,413.54,1754
2,1/3/2008,200,44.45,22.61,740.0,378.6,1884
3,1/4/2008,160,44.31,26.64,761.0,353.66,1768
4,1/5/2008,200,50.58,23.23,748.0,343.96,1715


In [41]:
#Function to test model performance changes with feature elimination MSE
def Mean_Square_Error(model, x_test, y_test):
	prediction = model.predict(x_test)
	print ("Mean Square error of model:", mean_squared_error(y_test, prediction))

In [42]:
#Setting a parameter for SVM model
C = 1.0

In [43]:
# Identifying the target feature by splitting the dataset
samples = df.filter(['TMF', 'TKM_percent', 'TUKM_percent', 'Alcantine', 'Fatty_Accid'])
scores = df.filter(['Total_Biogaz_Production'])

In [44]:
# Deleting the 'Date' column from the dataset as supposed 'irrelevent' or 'unprocessible'
del df['Date']

In [45]:
# Defining the number of features to investigate
nFeatures = len(df.columns) - 1

In [46]:
samples

Unnamed: 0,TMF,TKM_percent,TUKM_percent,Alcantine,Fatty_Accid
0,240,46.26,33.15,787.00,244.36
1,120,44.77,22.02,983.00,413.54
2,200,44.45,22.61,740.00,378.60
3,160,44.31,26.64,761.00,353.66
4,200,50.58,23.23,748.00,343.96
...,...,...,...,...,...
389,910,46.88,22.50,308.33,162.40
390,940,38.19,18.33,277.17,109.25
391,810,71.55,34.34,430.00,63.43
392,1100,99.82,47.91,410.00,48.98


In [47]:
scores

Unnamed: 0,Total_Biogaz_Production
0,2113
1,1754
2,1884
3,1768
4,1715
...,...
389,6132
390,5562
391,5870
392,5207


In [48]:
rfeIndex = nFeatures

In [49]:
#Recursively eliminate features based on the lowest weight
while True:
	#Split into training and testing
	x_train, x_test, y_train, y_test = train_test_split(samples, scores, test_size = 0.50, train_size=0.50)
	
	#Create SVM model using a linear kernel
	model = svm.SVR(kernel='linear', C=C).fit(x_train, y_train)
	coef = model.coef_

	#Print co-efficients of features
	for i in range(0, nFeatures):
		print(samples.columns[i-1],":", coef[0][i-1])
	
	#Find the minimum weight among features and eliminate the feature with the smallest weight
	min = coef[0][0]
	index = 0
	for i in range(0, rfeIndex):
		if min > coef[0][i-1]:
			index = index + 1
			min = coef[0][i-1]
	if len(samples.columns) == 1:
		print("After recursive elimination we have the", samples.columns[index], "feature with a score of:", min)
		Mean_Square_Error(model, x_test, y_test)
		break
	else:
		print ("Lowest feature weight is for", samples.columns[index], "with a value of:", min)
		print ("Dropping feature", samples.columns[index])  

		#Drop the feature in the 'samples' dataframe based on the lowest feature index
		samples.drop(samples.columns[index], axis = 1, inplace = True)
		Mean_Square_Error(model, x_test, y_test)
		print ("\n")
		rfeIndex = rfeIndex - 1
		nFeatures = nFeatures - 1

Fatty_Accid : 1.6436548300928848
TMF : 6.657213896331768
TKM_percent : 5.457184279436092
TUKM_percent : 29.474537096000077
Alcantine : 0.08988235420656565
Lowest feature weight is for TUKM_percent with a value of: 0.08988235420656565
Dropping feature TUKM_percent
Mean Square error of model: 8121951.551265039


Fatty_Accid : 2.1127632609003513
TMF : 5.596732539106597
TKM_percent : 24.183147355131958
Alcantine : -0.44507553501216535
Lowest feature weight is for Alcantine with a value of: -0.44507553501216535
Dropping feature Alcantine
Mean Square error of model: 9260311.29445238


Fatty_Accid : 0.365987946351197
TMF : 5.838201771366585
TKM_percent : 26.53703460708777
Lowest feature weight is for TKM_percent with a value of: 0.365987946351197
Dropping feature TKM_percent
Mean Square error of model: 8077565.72189119


Fatty_Accid : 1.3184196023751724
TMF : 5.890918980730476
Lowest feature weight is for Fatty_Accid with a value of: 1.3184196023751724
Dropping feature Fatty_Accid
Mean Square