In [1]:
import joblib
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [4]:
train_df = pd.read_csv('abalone_train.csv',
                       names = ['age', 'length', 'diameter', 'height',
                                'whole_weight', 'shucked_weight', 'viscera_weight',
                                'shell_weight', 'sex_I', 'sex_M'])
valid_df = pd.read_csv('abalone_validation.csv',
                       names = ['age', 'length', 'diameter', 'height',
                                'whole_weight', 'shucked_weight', 'viscera_weight',
                                'shell_weight', 'sex_I', 'sex_M'])

# Build model

In [5]:
model = RandomForestRegressor(n_estimators=100, random_state=123).fit(train_df.drop(columns='age'), train_df['age'])
predicted_age = model.predict(valid_df.drop(columns='age'))
mae = mean_absolute_error(predicted_age, valid_df['age'])
print(f"MAE = {mae:.2f} years")

MAE = 1.52 years


Using only the features ['age', 'length', 'diameter', 'height']:

In [6]:
features = ['length', 'diameter', 'height', 'whole_weight']
model = RandomForestRegressor(n_estimators=100, random_state=123).fit(train_df[features],
                                                                      train_df['age'])
predicted_age = model.predict(valid_df[features])
mae = mean_absolute_error(predicted_age, valid_df['age'])
print(f"MAE = {mae:.2f} years")



MAE = 1.88 years


# Save Model

In [7]:
features = ['length', 'diameter', 'height', 'whole_weight']
full_X = pd.concat((train_df[features], valid_df[features]))
full_y = pd.concat((train_df['age'], valid_df['age']))
model = RandomForestRegressor(n_estimators=100).fit(full_X, full_y)

In [9]:
with open('web_api/abalone_predictor.joblib', 'wb') as f:
    joblib.dump(model, f)
with open('web_application/abalone_predictor.joblib', 'wb') as f:
    joblib.dump(model, f)

## Prediction Function

In [10]:
input_json = {'length': 0.41,
              'diameter': 0.33,
              'height': 0.10,
              'whole_weight': 0.36}

In [11]:
def return_prediction(model, input_json):
    
    input_data = [[input_json[k] for k in input_json.keys()]]
    prediction = model.predict(input_data)[0]
    
    return prediction

In [12]:
return_prediction(model, input_json)



9.24