### **Research Project -  Khaled Alfawal**
#### Regression On Abalone Dataset

In [None]:
# Downloading the dataset
!wget -q https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data

In [None]:
# Importing the dataset
import pandas as pd
df = pd.read_csv('abalone.data')
'''
	Name		Data Type	Meas.	Description
	----		---------	-----	-----------
	Sex		nominal			M, F, and I (infant)
	Length		continuous	mm	Longest shell measurement
	Diameter	continuous	mm	perpendicular to length
	Height		continuous	mm	with meat in shell
	Whole weight	continuous	grams	whole abalone
	Shucked weight	continuous	grams	weight of meat
	Viscera weight	continuous	grams	gut weight (after bleeding)
	Shell weight	continuous	grams	after being dried
	Rings		integer			+1.5 gives the age in year
'''
# Add column names
df.columns = ["Sex", "Length", "Diameter", "Height", "Whole weight", "Shucked weight", "Viscera weight", "Shell weight", "Rings"]
dataset  = df

In [None]:
#check the shape of data
dataset.shape

In [None]:
# Splitting some data for testing later
data = dataset.sample(frac=0.9, random_state=786)
data_unseen = dataset.drop(data.index)

data.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

In [None]:
# Setting up the environment for pycaret
from pycaret.regression import *
exp_reg101 = setup(data = data, target = 'Rings', session_id=123) 

In [None]:
# Finding the best model
best = compare_models()
best

In [None]:
# Gradient Boosting Regressor is the best model
gbr = create_model('gbr')
print(gbr)

In [None]:
# Tuning the model to get the best parameters
tuned_gbr = tune_model(gbr)

In [None]:
# Plotting the model
plot_model(tuned_gbr)

In [None]:
# Plotting the error
plot_model(tuned_gbr, plot = 'error')

In [None]:
# Getting the best features 
plot_model(tuned_gbr, plot='feature')

In [None]:
# Evaluating the model
evaluate_model(tuned_gbr)

In [None]:
# Predicting the unseen data
predict_model(tuned_gbr)

In [None]:
# Saving the model
final_gbr = finalize_model(tuned_gbr)

In [None]:
# Checking with unseen data
unseen_predictions = predict_model(final_lightgbm, data=data_unseen)
unseen_predictions.head()