# Adaboost regression model

In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from evolution import rmsle
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import root_mean_squared_log_error as rmsle

In [2]:
# Load the dataset
df = pd.read_csv("../data/train.csv", index_col='id')

# standardize the features and lebel the "Sex" features
df['Sex'] = df['Sex'].map({'F': 1, 'I': 2, 'M': 0})

# standardize the features
scaler = StandardScaler()
scaler.fit(df[['Length', 'Diameter', 'Height', 'Whole weight', 'Whole weight.1', 'Whole weight.2', 'Shell weight']])
df[['Length', 'Diameter', 'Height', 'Whole weight', 'Whole weight.1', 'Whole weight.2', 'Shell weight']] = scaler.transform(df[['Length', 'Diameter', 'Height', 'Whole weight', 'Whole weight.1', 'Whole weight.2', 'Shell weight']])
df.head()

Unnamed: 0_level_0,Sex,Length,Diameter,Height,Whole weight,Whole weight.1,Whole weight.2,Shell weight,Rings
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,0.278317,0.288912,0.382451,-0.038314,-0.060061,-0.227155,0.108309,11
1,1,0.955044,0.900996,0.250897,0.745005,0.573416,1.061143,0.722736,11
2,2,-3.020727,-2.975535,-2.906386,-1.678148,-1.640084,-1.649238,-1.69657,6
3,0,0.658976,0.747975,0.382451,0.27414,0.16985,0.357534,0.185113,10
4,2,0.320613,0.237905,-0.143763,-0.015371,0.140499,-0.09337,-0.218105,9


In [3]:
# splitting the dataset

X_train, X_test, y_train, y_test = train_test_split(df[['Sex', 'Length', 'Height', 'Whole weight', 'Whole weight.1', 'Whole weight.2', 'Shell weight']], df['Rings'], test_size=0.2, random_state=42)
# now let's train the classifier model
linreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state=42)
adaboost_classifier = AdaBoostClassifier(base_estimator=linreg, n_estimators=50, learning_rate=1.0, random_state=42)

adaboost_classifier.fit(X_train, y_train)

y_pred = adaboost_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy score:", accuracy)
print("Classification report:\n", classification_report(y_test, y_pred))
print("RMSLE Score : ", rmsle(y_test, y_pred))



Accuracy score: 0.30993764829222537
Classification report:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00         6
           2       0.00      0.00      0.00         8
           3       0.00      0.00      0.00        77
           4       0.50      0.02      0.04       284
           5       0.00      0.00      0.00       596
           6       0.00      0.00      0.00      1088
           7       0.29      0.62      0.40      1781
           8       0.43      0.45      0.44      2947
           9       0.30      0.57      0.39      3482
          10       0.24      0.29      0.26      2454
          11       0.29      0.29      0.29      1636
          12       0.00      0.00      0.00       965
          13       0.00      0.00      0.00       786
          14       0.00      0.00      0.00       519
          15       0.00      0.00      0.00       416
          16       0.00      0.00      0.00       287
          17       0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
# let's store this model for future use
import pickle 

with open("../trained_models/adaboost.pkl", "wb") as f:
    pickle.dump(linreg, f)

In [6]:
# let's submit knn result to the kaggle competition

test_df = pd.read_csv("../data/test.csv", index_col="id")
test_df['Sex'] = test_df['Sex'].map({'F': 1, 'I': 2, 'M': 0})

test_df[['Length', 'Diameter', 'Height', 'Whole weight', 'Whole weight.1', 'Whole weight.2', 'Shell weight']] = scaler.transform(test_df[['Length', 'Diameter', 'Height', 'Whole weight', 'Whole weight.1', 'Whole weight.2', 'Shell weight']])

test_df['pred_Rings'] = adaboost_classifier.predict(test_df[["Sex",'Length', 'Height', 'Whole weight', 'Whole weight.1', 'Whole weight.2', 'Shell weight']])

test_df.to_csv('../data/modified_test.csv', index=False)

sub = pd.DataFrame({'id': test_df.index, 'Rings': test_df['pred_Rings']})
sub.to_csv("../submission/adaboost.csv", index=False)

In [9]:
# # submitting to kaggle

# !kaggle competitions submit -c playground-series-s4e4 -f ../submission/adaboost.csv -m "adaboost LinearRegression2 model submission"

Successfully submitted to Regression with an Abalone Dataset



  0%|          | 0.00/596k [00:00<?, ?B/s]
  1%|▏         | 8.00k/596k [00:00<00:44, 13.4kB/s]
100%|██████████| 596k/596k [00:03<00:00, 159kB/s]  
