In [1]:
import pandas as pd
import numpy as np
import uuid
import joblib
from datetime import date

from lib.experience_data import pull_explanatory_variables, create_model, run_predictions
from lib.experience_data import process_data_for_analysis, get_latest_joblib
from lib.config import app, db
from lib.models import Hive, ModelHistory

Functions

Clean Hive Data for Study

In [2]:
with app.app_context():
    hives = [hive.to_dict() for hive in Hive.query.all()]

In [3]:
df_normalized, df_aggregated = process_data_for_analysis(hives)

df_aggregated.head()

Unnamed: 0,hive_id,date_added,city,state,honey_pull_id,date_reset,weight,date_pulled,count,temp,...,wasps_hornets_present,mice_present,robber_bees_present,has_chalkbrood,has_twisted_larvae,material_Polystyrene,material_Wood,days,avg_daily_weight,avg_30_day_weight
0,542,2024-08-13,North Markfurt,West Virginia,1665.0,2024-08-13,8.042778,2024-12-26,19,11.689474,...,0,6,2,1,3,19,0,135,0.059576,1.787284
1,544,2023-06-29,Lake Michaelburgh,Georgia,1668.0,2023-06-29,8.163584,2023-11-13,19,22.047368,...,1,7,4,3,0,19,0,137,0.059588,1.787646
2,544,2023-06-29,Lake Michaelburgh,Georgia,1669.0,2023-11-18,15.543398,2024-05-14,25,8.356,...,0,11,2,1,1,25,0,178,0.087322,2.619674
3,544,2023-06-29,Lake Michaelburgh,Georgia,1670.0,2024-05-25,23.70784,2024-09-15,16,28.05,...,2,4,0,0,4,16,0,113,0.209804,6.294117
4,544,2023-06-29,Lake Michaelburgh,Georgia,1671.0,2024-09-19,11.002376,2025-02-08,20,8.725,...,0,6,4,3,2,20,0,142,0.077482,2.324446


Run Study and Save to Joblib

In [4]:
explanatory_variables=pull_explanatory_variables(df_aggregated)
joblib_loc = f'exp_study{uuid.uuid4().hex}.joblib'

model, test_results, importance_df = create_model(df_aggregated, explanatory_variables, joblib_loc)

results = test_results[['weight', 'avg_daily_weight', 'days', 'avg_predicted', 'predicted']].copy()

results

Unnamed: 0,weight,avg_daily_weight,days,avg_predicted,predicted
43,13.55728,0.085806,158,0.165185,26.099229
40,9.490434,0.056829,167,0.182571,30.489426
46,22.848384,0.18883,121,0.181033,21.905027
12,11.438874,0.115544,99,0.163797,16.215871
24,24.129024,0.159795,151,0.186378,28.143019
31,11.945725,0.071531,167,0.104225,17.405548
17,15.704128,0.110592,142,0.143214,20.33634
32,11.656691,0.074246,157,0.161176,25.304614
3,23.70784,0.209804,113,0.124412,14.058502
30,10.350954,0.077827,133,0.126474,16.821104


Save Joblib location to Database

In [5]:
with app.app_context():
    # Save metadata for new model to database
    new_study = ModelHistory(
        joblib_loc = joblib_loc
    )

    # Add the new study to the database and commit
    db.session.add(new_study)
    db.session.commit()

Update active model in Database

In [6]:
with app.app_context():
    prior_model = ModelHistory.query.filter(
        ModelHistory.end_date == None, 
        ModelHistory.start_date != None
    ).first()

    # Set the end date
    prior_model.end_date = date.today()

    current_model = ModelHistory.query.filter_by(
        joblib_loc=joblib_loc
    ).first()

    # Set the start and end dates
    current_model.start_date = date.today()
    current_model.end_date = None

    # Commit changes to the database
    db.session.commit()