### Import Librarires And Dataset

In [53]:
import warnings
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

#some settings to show data
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)

# target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data")
target_url = ("datasets/abalone.data")
abalone = pd.read_csv(target_url)
abalone.columns = ['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings']

### Clean and Analyze the Data

In [54]:
abalone.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [55]:
abalone.isnull().sum(axis = 0)

Sex               0
Length            0
Diameter          0
Height            0
Whole weight      0
Shucked weight    0
Viscera weight    0
Shell weight      0
Rings             0
dtype: int64

In [56]:
abalone.describe()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
count,4176.0,4176.0,4176.0,4176.0,4176.0,4176.0,4176.0,4176.0
mean,0.524009,0.407892,0.139527,0.828818,0.3594,0.180613,0.238852,9.932471
std,0.120103,0.09925,0.041826,0.490424,0.22198,0.10962,0.139213,3.223601
min,0.075,0.055,0.0,0.002,0.001,0.0005,0.0015,1.0
25%,0.45,0.35,0.115,0.4415,0.186,0.093375,0.13,8.0
50%,0.545,0.425,0.14,0.79975,0.336,0.171,0.234,9.0
75%,0.615,0.48,0.165,1.15325,0.502,0.253,0.329,11.0
max,0.815,0.65,1.13,2.8255,1.488,0.76,1.005,29.0


In [57]:
abalone = pd.get_dummies(abalone)

In [58]:
abalone.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4176 entries, 0 to 4175
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Length          4176 non-null   float64
 1   Diameter        4176 non-null   float64
 2   Height          4176 non-null   float64
 3   Whole weight    4176 non-null   float64
 4   Shucked weight  4176 non-null   float64
 5   Viscera weight  4176 non-null   float64
 6   Shell weight    4176 non-null   float64
 7   Rings           4176 non-null   int64  
 8   Sex_F           4176 non-null   bool   
 9   Sex_I           4176 non-null   bool   
 10  Sex_M           4176 non-null   bool   
dtypes: bool(3), float64(7), int64(1)
memory usage: 273.4 KB


In [59]:
abalone.head(20)

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings,Sex_F,Sex_I,Sex_M
0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,False,False,True
1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,True,False,False
2,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,False,False,True
3,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,False,True,False
4,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8,False,True,False
5,0.53,0.415,0.15,0.7775,0.237,0.1415,0.33,20,True,False,False
6,0.545,0.425,0.125,0.768,0.294,0.1495,0.26,16,True,False,False
7,0.475,0.37,0.125,0.5095,0.2165,0.1125,0.165,9,False,False,True
8,0.55,0.44,0.15,0.8945,0.3145,0.151,0.32,19,True,False,False
9,0.525,0.38,0.14,0.6065,0.194,0.1475,0.21,14,True,False,False


In [60]:
import seaborn as sns

corr = abalone.corr()
corr.style.background_gradient(cmap='coolwarm')
# 'RdBu_r' & 'BrBG' are other good diverging colormaps
cm = sns.diverging_palette(220, 20, sep=20, as_cmap=True)
corr.style.background_gradient(cmap=cm)

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings,Sex_F,Sex_I,Sex_M
Length,1.0,0.986813,0.827552,0.925255,0.897905,0.90301,0.897697,0.557123,0.309602,-0.551613,0.236783
Diameter,0.986813,1.0,0.833705,0.925452,0.893159,0.899726,0.905328,0.575005,0.318581,-0.564431,0.240567
Height,0.827552,0.833705,1.0,0.819209,0.774957,0.798293,0.817326,0.558109,0.298306,-0.518828,0.215869
Whole weight,0.925255,0.925452,0.819209,1.0,0.969403,0.966372,0.955351,0.540818,0.299668,-0.557757,0.252305
Shucked weight,0.897905,0.893159,0.774957,0.969403,1.0,0.931956,0.882606,0.421256,0.263919,-0.521995,0.252048
Viscera weight,0.90301,0.899726,0.798293,0.966372,0.931956,1.0,0.907647,0.504274,0.308363,-0.556267,0.242489
Shell weight,0.897697,0.905328,0.817326,0.955351,0.882606,0.907647,1.0,0.628031,0.306248,-0.547116,0.235652
Rings,0.557123,0.575005,0.558109,0.540818,0.421256,0.504274,0.628031,1.0,0.25062,-0.435958,0.181426
Sex_F,0.309602,0.318581,0.298306,0.299668,0.263919,0.308363,0.306248,0.25062,1.0,-0.464461,-0.512449
Sex_I,-0.551613,-0.564431,-0.518828,-0.557757,-0.521995,-0.556267,-0.547116,-0.435958,-0.464461,1.0,-0.522462


**Delete target value**

In [61]:
y = abalone["Rings"]
X = abalone.drop(columns="Rings")

In [62]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)