In [1]:
# This is a sample abalone age predictor

### Imports

In [4]:
import joblib
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error 

### Load data

In [7]:
# names for all columns
names = ["age", "length", "diameter", "height",
        "whole_weight", "shucked_weight", "viscera_weight",
        "shell_weight", "sex_I", "sex_M"]

In [8]:
train_df = pd.read_csv("../data/abalone_train.csv",
                      names=names)
test_df = pd.read_csv("../data/abalone_validation.csv",names=names)

### Building model

In [13]:
abalone_model = RandomForestRegressor(n_estimators=100, random_state=123).fit(train_df.drop(columns="age"), train_df["age"])

# predicted (uses the validation data)
predicted_age = abalone_model.predict(test_df.drop(columns="age"))
# calculates mae (metric to evaluate performance of model)
mae = mean_absolute_error(predicted_age, test_df["age"])
print(f"MAE = {mae:.2f} years")



MAE = 1.52 years


### Save model

In [15]:
# Re-fit model on full dataset to get ready for deployment and save it using joblib

In [None]:
# Note you could reduce the number of features to be considered inside the model

In [21]:
features = ["length", "diameter", "height", "whole_weight"]
# Meaniningly concatenating train df and valid df
full_X = pd.concat((train_df[features], test_df[features]))
full_y = pd.concat((train_df["age"], test_df["age"]))
# refit a final model
final_model = RandomForestRegressor(n_estimators=100).fit(full_X, full_y)

In [22]:
# Now save this final model using joblib to web_api and web_appliations
# One for api use, one for backend calculation?

# Saving to api
with open("web_api/abalone_predictor.joblib", "wb") as f:
    joblib.dump(final_model, f)
# Saving to app    
with open("web_application/abalone_predictor.joblib", "wb") as f:
    joblib.dump(final_model, f)

### Prediction function

In [23]:
# Define a function that acceps input data and returns a prediction computed
# or evaluated by our model, and this is to be returned to user

# Inputs should in JSON format (i.e. dictionary)
input_json = {
    "length": 0.41,
    "diameter": 0.33,
    "height": 0.10,
    "whole_weight": 0.36
}

In [34]:
def return_prediction(model, input_json):
    # Serializes the input json to a list of list of features
    # i.e. [ [x1, x2 , x3, x4, x5]    ]
    # makes two list for extra safe?
    # Could just use one by removing one pair of [] below in input_data
    input_data = [[input_json[k] for k in input_json.keys()]]
    # Since our input_data is [[x1, x2, x3]], hence [0] to retrieve 
    # the feature array
    prediction = model.predict(input_data)[0]
    return prediction

In [35]:
inp = return_prediction(final_model, input_json)
inp



9.28