In [1]:
import data_processing as dclean
from config import app, db
from models import Hive, ModelHistory
import pandas as pd
import numpy as np
import experience_study as exps
import uuid
import joblib
from datetime import date

Functions

In [2]:
def get_latest_joblib():
    try:
        # Fetch the latest model in a single context
        with app.app_context():
            # First, attempt to fetch the model with both conditions (active model)
            model_record = ModelHistory.query.filter(
                ModelHistory.end_date == None, 
                ModelHistory.start_date != None
            ).first()

            if not model_record:
                print('No active model found, saving most recent model...')

                # Fetch the model without the start date condition
                model_record = ModelHistory.query.filter(
                    ModelHistory.end_date == None, 
                ).first()

                # Set the start date
                model_record.start_date = date.today()

                # Commit changes to the database
                db.session.commit()

            joblib_loc = model_record.joblib_loc
    except:
        raise ValueError('No models found')
        
    return joblib_loc

Clean Hive Data for Study

In [3]:
with app.app_context():
    hives = [hive.to_dict() for hive in Hive.query.all()]

In [4]:
hives_mod = dclean.rename_ids(hives)
df_normalized = dclean.normalize_data(hives_mod)
df_aggregated = dclean.aggregate_data(df_normalized)

Run Study and Save to Joblib

In [5]:
explanatory_variables=exps.pull_explanatory_variables(df_aggregated)
joblib_loc = f'exp_study{uuid.uuid4().hex}.joblib'

model, test_results = exps.create_model(df_aggregated, explanatory_variables, joblib_loc)

test_results[['weight', 'predicted']]



Unnamed: 0,weight,predicted
13,21.043748,4.551651
45,33.759202,-15.084636
47,160.907706,7.59273
44,195.91951,25.938818
17,26.898822,-7.562116
27,66.974855,8.304719
26,11.253755,-14.646378
25,159.759734,33.53765
31,49.134799,-5.276948
19,164.044244,7.057786


In [6]:
joblib_data = joblib.load(fr'joblib/{joblib_loc}')
model = joblib_data['model']
scaler = joblib_data['scaler']
explanatory_variables = joblib_data['explanatory_variables']

Save Joblib location to Database

In [7]:
with app.app_context():
    # Save metadata for new model to database
    new_study = ModelHistory(
        joblib_loc = joblib_loc
    )

    # Add the new study to the database and commit
    db.session.add(new_study)
    db.session.commit()

Update active model in Database

In [14]:
with app.app_context():
    prior_model = ModelHistory.query.filter(
        ModelHistory.end_date == None, 
        ModelHistory.start_date != None
    ).first()

    current_model = ModelHistory.query.filter_by(
        joblib_loc=joblib_loc
    ).first()

    print(current_model)


<ModelHistory 12>


In [15]:
with app.app_context():
    prior_model = ModelHistory.query.filter(
        ModelHistory.end_date == None, 
        ModelHistory.start_date != None
    ).first()

    # Set the end date
    prior_model.end_date = date.today()

    current_model = ModelHistory.query.filter_by(
        joblib_loc=joblib_loc
    ).first()

    # Set the start and end dates
    current_model.start_date = date.today()
    current_model.end_date = None

    # Commit changes to the database
    db.session.commit()

In [16]:
joblib_loc = get_latest_joblib()
df_normalized, df_prediction_input = dclean.process_data_for_analysis(hives, actuals=False)

predicted_values = exps.run_predictions(df_prediction_input, joblib_loc)

predicted_values.head()

Unnamed: 0,hive_id,date_added,city,state,honey_pull_id,date_reset,count,temp,bias,num_pollen_patties,...,wax_moths_present,wasps_hornets_present,mice_present,robber_bees_present,has_chalkbrood,has_twisted_larvae,material_Polystyrene,material_Wood,days,predicted
0,182,2023-03-10,Macdonaldside,Maryland,533.0,2025-01-06,5,-2.38,2.4,0.4,...,0,0,0,1,1,0,0,0,30,53.037198
1,183,2023-10-15,East Matthew,West Virginia,537.0,2024-10-22,16,6.9875,1.3125,1.125,...,1,3,1,1,1,2,0,16,106,4.56963
2,184,2024-02-23,Moyerstad,New Mexico,540.0,2024-11-30,10,3.24,1.3,0.9,...,0,1,0,5,0,0,0,10,67,41.831715
3,185,2024-07-29,West Aliciaport,Illinois,542.0,2025-01-27,2,6.3,1.5,1.0,...,0,0,0,0,0,0,0,0,9,67.766906
4,186,2025-01-22,East Donald,Delaware,543.0,2025-01-22,2,-3.8,2.0,1.0,...,0,0,0,1,0,0,0,0,14,59.722947


Testing

In [65]:
hives_mod = dclean.rename_ids(hives)
print('renamed ids...')

df_normalized = dclean.normalize_data(hives_mod)
print('df normalized...')

df_aggregated = dclean.aggregate_data(df_normalized)
print('df aggregated...')

json_normalized = df_normalized.to_dict(orient='list')
print('json normalized...')

json_aggregated = df_aggregated.to_dict(orient='list')
print('json aggregated...')

renamed ids...
df normalized...
df aggregated...
json normalized...
json aggregated...
