In [1]:
import uuid
from datetime import date

from lib.experience_data import pull_explanatory_variables, create_model, run_predictions
from lib.experience_data import process_data_for_analysis, get_latest_joblib
from lib.config import app, db
from lib.models import Hive, ModelHistory

Functions

Clean Hive Data for Study

In [2]:
with app.app_context():
    hives = [hive.to_dict() for hive in Hive.query.all()]

In [3]:
df_normalized, df_aggregated = process_data_for_analysis(hives)

df_aggregated.head()

Unnamed: 0,hive_id,date_added,city,state,honey_pull_id,date_reset,weight,date_pulled,count,temp,...,wasps_hornets_present,mice_present,robber_bees_present,has_chalkbrood,has_twisted_larvae,material_Polystyrene,material_Wood,days,avg_daily_weight,avg_30_day_weight
0,579,2023-05-11,Lake Kevinchester,South Carolina,1806.0,2023-05-11,17.964672,2023-10-01,20,26.02,...,2,4,3,0,1,0,20,143,0.125627,3.768812
1,579,2023-05-11,Lake Kevinchester,South Carolina,1807.0,2023-10-12,7.700732,2024-02-12,17,7.094118,...,0,4,1,4,2,0,17,123,0.062608,1.878227
2,579,2023-05-11,Lake Kevinchester,South Carolina,1808.0,2024-02-23,14.922702,2024-07-22,21,19.028571,...,3,6,3,1,2,0,21,150,0.099485,2.98454
3,579,2023-05-11,Lake Kevinchester,South Carolina,1809.0,2024-07-27,10.144742,2025-01-15,24,16.9125,...,2,11,4,3,1,0,24,172,0.058981,1.769432
4,580,2023-11-18,Oconnorburgh,Kentucky,1811.0,2023-11-18,4.17792,2024-03-04,15,1.613333,...,0,6,3,1,2,0,15,107,0.039046,1.171379


Run Study and Save to Joblib

In [4]:
explanatory_variables=pull_explanatory_variables(df_aggregated)
joblib_loc = f'exp_study{uuid.uuid4().hex}.joblib'

model, test_results, importance_df = create_model(df_aggregated, explanatory_variables, joblib_loc)

results = test_results[['weight', 'avg_daily_weight', 'days', 'avg_predicted', 'predicted']].copy()

results

Unnamed: 0,weight,avg_daily_weight,days,avg_predicted,predicted
8,7.276544,0.057296,127,0.005216,0.662471
16,12.980198,0.120187,108,0.013679,1.477378
0,17.964672,0.125627,143,0.108824,15.561894
24,13.508352,0.143706,94,0.0,0.0
11,6.509722,0.065097,100,0.01959,1.959049
9,7.052282,0.046397,152,0.077211,11.736077
13,7.068864,0.060938,116,0.0,0.0
1,7.700732,0.062608,123,0.029897,3.677359


Save Joblib location to Database

In [5]:
with app.app_context():
    # Save metadata for new model to database
    new_study = ModelHistory(
        joblib_loc = joblib_loc
    )

    # Add the new study to the database and commit
    db.session.add(new_study)
    db.session.commit()

Update active model in Database

In [6]:
with app.app_context():
    prior_model = ModelHistory.query.filter(
        ModelHistory.end_date == None, 
        ModelHistory.start_date != None
    ).first()

    # Set the end date
    prior_model.end_date = date.today()

    current_model = ModelHistory.query.filter_by(
        joblib_loc=joblib_loc
    ).first()

    # Set the start and end dates
    current_model.start_date = date.today()
    current_model.end_date = None

    # Commit changes to the database
    db.session.commit()