In [18]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

**Read the dataset.**

In [19]:
data = pd.read_csv(r"abalone.csv")

In [20]:
print(data.head)

<bound method NDFrame.head of      Sex  Length  Diameter  Height  Whole weight  Shucked weight  \
0      M   0.455     0.365   0.095        0.5140          0.2245   
1      M   0.350     0.265   0.090        0.2255          0.0995   
2      F   0.530     0.420   0.135        0.6770          0.2565   
3      M   0.440     0.365   0.125        0.5160          0.2155   
4      I   0.330     0.255   0.080        0.2050          0.0895   
...   ..     ...       ...     ...           ...             ...   
4172   F   0.565     0.450   0.165        0.8870          0.3700   
4173   M   0.590     0.440   0.135        0.9660          0.4390   
4174   M   0.600     0.475   0.205        1.1760          0.5255   
4175   F   0.625     0.485   0.150        1.0945          0.5310   
4176   M   0.710     0.555   0.195        1.9485          0.9455   

      Viscera weight  Shell weight  Rings  
0             0.1010        0.1500     15  
1             0.0485        0.0700      7  
2             0.1415 

In [21]:
print(data.columns)

Index(['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight',
       'Viscera weight', 'Shell weight', 'Rings'],
      dtype='object')


In [22]:
print(data.dtypes)

Sex                object
Length            float64
Diameter          float64
Height            float64
Whole weight      float64
Shucked weight    float64
Viscera weight    float64
Shell weight      float64
Rings               int64
dtype: object


In [23]:
print(data.shape)

(4177, 9)


In [24]:
print(data.isnull().sum())

Sex               0
Length            0
Diameter          0
Height            0
Whole weight      0
Shucked weight    0
Viscera weight    0
Shell weight      0
Rings             0
dtype: int64


In [26]:
from sklearn.preprocessing import LabelEncoder

# Assuming your dataframe is named 'data' and the column containing "M" and "F" is named 'gender'
label_encoder = LabelEncoder()
data['Sex'] = label_encoder.fit_transform(data['Sex'])

In [27]:
data.head(5)

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,2,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,2,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,0,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,2,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,1,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [28]:
print(data.dtypes)

Sex                 int32
Length            float64
Diameter          float64
Height            float64
Whole weight      float64
Shucked weight    float64
Viscera weight    float64
Shell weight      float64
Rings               int64
dtype: object


In [29]:
X = data[['Length','Diameter','Height','Whole weight','Shucked weight','Shell weight', 'Viscera weight','Sex']] 
y = data['Rings'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)


In [30]:
print("R2 Score: ", r2_score(y_test, y_pred)*100)



R2 Score:  53.23381317508213


In [32]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

rmse = np.sqrt(mse)

r2 = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R2):", r2)


Mean Squared Error (MSE): 5.062537954095227
Root Mean Squared Error (RMSE): 2.2500084342275755
R-squared (R2): 0.5323381317508213


In [33]:
row_index = 0
row = data.iloc[row_index]
features = row[['Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Shell weight', 'Viscera weight', 'Sex']].values.reshape(1, -1)
print("Features for prediction:", features)


Features for prediction: [[0.455  0.365  0.095  0.514  0.2245 0.15   0.101  2.    ]]


In [34]:
prediction = model.predict(features)
print(f"Predicted rings for row {row_index}: {prediction[0]}")


Predicted rings for row 0: 8.863156224173391


