In [87]:
import pandas as pd
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import root_mean_squared_error as RMSE
from sklearn.linear_model import Ridge, Lasso, HuberRegressor, RANSACRegressor, TheilSenRegressor,ElasticNet,LinearRegression
from sklearn.preprocessing import LabelEncoder
import seaborn as sns 
import matplotlib.pyplot as plt
import numpy as np

In [88]:
submission = pd.read_csv("csv/sample_submission.csv")
test = pd.read_csv("csv/test.csv")
train = pd.read_csv("csv/train.csv")

In [89]:
train.head()

Unnamed: 0,id,Sex,Length,Diameter,Height,Weight,Shucked Weight,Viscera Weight,Shell Weight,Age
0,0,I,0.55,0.4125,0.1625,1.715145,0.609514,0.396893,0.56699,4.0
1,1,F,1.5125,1.2125,0.4,31.312023,13.395139,6.265239,8.930093,10.0
2,2,F,1.5125,1.175,0.4125,31.552993,14.670866,6.279414,9.922325,11.0
3,3,I,0.8,0.6,0.2,4.620969,3.019222,0.978058,1.417475,7.0
4,4,I,1.3875,1.0875,0.3625,24.323871,11.651644,5.712424,5.386405,8.0


In [90]:
X = train[["Length"]]
y = train[["Age"]]

In [91]:
linear_reg = LinearRegression()

linear_reg.fit(X,y)

y_pred = linear_reg.predict(test[["Length"]])

print(f"MAE ---> {MAE(test[["Length"]],y_pred)}")
print(f"RMSE ---> {RMSE(test[["Length"]],y_pred)}")


MAE ---> 8.590589432632672
RMSE ---> 8.76256321123702


In [92]:
submission["Age"] = y_pred
submission.to_csv("my_submission.csv",index=False)

16 - place :     1.74808

In [93]:
models = [
    ('linear', LinearRegression()),
    ('ridge', Ridge(alpha=1)),
    ('lasso', Lasso(alpha=0.1)),
    ('elasticnet', ElasticNet(alpha=1, l1_ratio=0.5)),
    ('huber', HuberRegressor()),
    ('ransac', RANSACRegressor()),
    ('theil', TheilSenRegressor())
]

In [94]:
train.head()

Unnamed: 0,id,Sex,Length,Diameter,Height,Weight,Shucked Weight,Viscera Weight,Shell Weight,Age
0,0,I,0.55,0.4125,0.1625,1.715145,0.609514,0.396893,0.56699,4.0
1,1,F,1.5125,1.2125,0.4,31.312023,13.395139,6.265239,8.930093,10.0
2,2,F,1.5125,1.175,0.4125,31.552993,14.670866,6.279414,9.922325,11.0
3,3,I,0.8,0.6,0.2,4.620969,3.019222,0.978058,1.417475,7.0
4,4,I,1.3875,1.0875,0.3625,24.323871,11.651644,5.712424,5.386405,8.0


In [95]:
train.value_counts("Sex")

Sex
M           5387
I           5050
F           4562
Diameter       1
Name: count, dtype: int64

In [96]:
# X = train.drop(columns=["Age"])
X = train[["Length"]]
y = train[["Age"]]

In [97]:
for i in models:
    model = i[-1]
    model.fit(X,y)
    y_pred = model.predict(test[["Length"]])
    print(f"Model: {i[0]}       MAE: {MAE(test["Length"], y_pred)}")




Model: linear       MAE: 8.590589432632672
Model: ridge       MAE: 8.590611909157657
Model: lasso       MAE: 8.59543342999965
Model: elasticnet       MAE: 8.618284598362601
Model: huber       MAE: 8.029023586736146
Model: ransac       MAE: 7.938088600683645


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Model: theil       MAE: 7.873616183734154


In [98]:
X = train.drop(columns=["Age"])
y = train[["Age"]]

label_encoder  = LabelEncoder()
train["Sex"] = label_encoder.fit_transform(train["Sex"])
train.loc[train["Sex"] == 0, "Sex"] = train["Sex"].mode()

In [99]:
test["Sex"] = label_encoder.fit_transform(test["Sex"])
X["Sex"] = label_encoder.fit_transform(X["Sex"])

In [100]:
X.head()

Unnamed: 0,id,Sex,Length,Diameter,Height,Weight,Shucked Weight,Viscera Weight,Shell Weight
0,0,2,0.55,0.4125,0.1625,1.715145,0.609514,0.396893,0.56699
1,1,1,1.5125,1.2125,0.4,31.312023,13.395139,6.265239,8.930093
2,2,1,1.5125,1.175,0.4125,31.552993,14.670866,6.279414,9.922325
3,3,2,0.8,0.6,0.2,4.620969,3.019222,0.978058,1.417475
4,4,2,1.3875,1.0875,0.3625,24.323871,11.651644,5.712424,5.386405


In [101]:
model = RANSACRegressor()
model = i[-1]
model.fit(X,y)
y_pred = model.predict(test)

  y = column_or_1d(y, warn=True)


In [102]:
submission["Age"] = y_pred
submission.to_csv("my_submission.csv",index=False)
submission

Unnamed: 0,id,Age
0,15000,6.666478
1,15001,8.669375
2,15002,5.628867
3,15003,7.733118
4,15004,6.817833
...,...,...
9995,24995,8.952947
9996,24996,8.269827
9997,24997,9.958038
9998,24998,8.222661
