#### Predict the age of abalone from physical measurements
##### More information about data can be found from following link

https://archive.ics.uci.edu/ml/datasets/Abalone

## Random Forest Regression

#### Import necessary libraries

In [2]:
import numpy as np
import pandas as pd

#### Import .data file (text file ) and into csv format for simplification. If you have  already vsc formate then ignore this step

In [3]:
col_name=['Sex','Length','Diameter','Height','Whole weight','Shucked weight','Viscera weight','Shelle weight','Rings']
data=pd.read_table('abalone.data',delimiter=',',names=col_name)
data.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shelle weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [4]:
data.to_csv('abalone.csv',index=False,index_label=False)

#### Load the data set from csv file

In [5]:
df=pd.read_csv('abalone.csv',index_col=None)

In [6]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shelle weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [7]:
print(df.isnull().sum())

Sex               0
Length            0
Diameter          0
Height            0
Whole weight      0
Shucked weight    0
Viscera weight    0
Shelle weight     0
Rings             0
dtype: int64


In [8]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shelle weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


#### Extract  features and Lables from the dataset

In [9]:
X=df.iloc[:,1:-1].values
y=df.iloc[:,-1]

In [10]:
print(X.shape)
print(y.shape)

(4177, 7)
(4177,)


#### Split dataset into train test data sets

In [11]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)
print(X_train.shape)
print(X_test.shape)
y_test=np.array(y_test)

(3341, 7)
(836, 7)


#### Train the Random Forest Regression on training data

In [13]:
from sklearn.ensemble import RandomForestRegressor
classifier=RandomForestRegressor(n_estimators=100)
classifier.fit(X_train,y_train)

RandomForestRegressor()

#### Predict the Single Result

In [14]:
prediction=classifier.predict([[0.456,0.345,0.049,1.201,0.2249,0.1245,0.2]])
print('The predicted no of Rings are {}'.format(int(prediction)))

The predicted no of Rings are 17


#### Predict the test data result

In [15]:
y_predict=np.int64(classifier.predict(X_test))
print(y_predict.shape)
print(y_test.shape)
print(y_predict.dtype)
print(y_test.dtype)

(836,)
(836,)
int64
int64


In [16]:
result=np.concatenate((y_predict.reshape(len(y_predict),1),y_test.reshape(len(y_test),1)),1)
print(result)

[[15 13]
 [11  8]
 [11 11]
 ...
 [ 7  7]
 [14 17]
 [ 4  4]]


#### R2 score

In [17]:
from sklearn.metrics import r2_score
r2=r2_score(y_test,y_predict)
print('R2 score of the model is {:.2f}'.format(r2))

R2 score of the model is 0.53
