In [1]:
import data_processing as dclean
from config import app, db
from models import Hive, ModelHistory
import pandas as pd
import numpy as np
import experience_study as exps
import uuid
import joblib
from datetime import date

Functions

Clean Hive Data for Study

In [3]:
with app.app_context():
    hives = [hive.to_dict() for hive in Hive.query.all()]

In [4]:
df_normalized, df_aggregated = dclean.process_data_for_analysis(hives)

df_aggregated.head()

Unnamed: 0,hive_id,date_added,city,state,honey_pull_id,date_reset,weight,date_pulled,count,temp,...,hive_beetles_present,wax_moths_present,wasps_hornets_present,mice_present,robber_bees_present,has_chalkbrood,has_twisted_larvae,material_Polystyrene,material_Wood,days
0,182,2023-03-10,Macdonaldside,Maryland,528.0,2023-03-10,40.510924,2023-08-30,25,22.552,...,20,0,17,0,10,6,1,0,0,173
1,182,2023-03-10,Macdonaldside,Maryland,529.0,2023-09-13,150.918416,2023-12-21,15,14.806667,...,5,2,2,0,0,0,3,0,0,99
2,182,2023-03-10,Macdonaldside,Maryland,530.0,2023-12-28,101.099227,2024-06-09,24,7.3,...,5,1,1,3,2,0,2,0,0,164
3,182,2023-03-10,Macdonaldside,Maryland,531.0,2024-06-10,61.505208,2024-09-10,14,29.707143,...,5,3,6,0,4,0,0,0,0,92
4,182,2023-03-10,Macdonaldside,Maryland,532.0,2024-09-20,158.11645,2025-01-01,15,11.78,...,2,2,1,0,2,2,1,0,0,103


Run Study and Save to Joblib

In [5]:
explanatory_variables=exps.pull_explanatory_variables(df_aggregated)
joblib_loc = f'exp_study{uuid.uuid4().hex}.joblib'

model, test_results, importance_df = exps.create_model(df_aggregated, explanatory_variables, joblib_loc)

test_results[['weight', 'predicted']]



Unnamed: 0,weight,predicted
13,21.043748,3.360514
45,33.759202,-12.858297
47,160.907706,7.739705
44,195.91951,23.389971
17,26.898822,-5.822864
27,66.974855,6.82906
26,11.253755,-13.776974
25,159.759734,29.668048
31,49.134799,-6.525052
19,164.044244,6.248504


In [6]:
joblib_data = joblib.load(fr'joblib/{joblib_loc}')
model = joblib_data['model']
scaler = joblib_data['scaler']
explanatory_variables = joblib_data['explanatory_variables']

Save Joblib location to Database

In [7]:
with app.app_context():
    # Save metadata for new model to database
    new_study = ModelHistory(
        joblib_loc = joblib_loc
    )

    # Add the new study to the database and commit
    db.session.add(new_study)
    db.session.commit()

Update active model in Database

In [9]:
with app.app_context():
    prior_model = ModelHistory.query.filter(
        ModelHistory.end_date == None, 
        ModelHistory.start_date != None
    ).first()

    # Set the end date
    prior_model.end_date = date.today()

    current_model = ModelHistory.query.filter_by(
        joblib_loc=joblib_loc
    ).first()

    # Set the start and end dates
    current_model.start_date = date.today()
    current_model.end_date = None

    # Commit changes to the database
    db.session.commit()

In [None]:
joblib_loc = exps.get_latest_joblib()
df_normalized, df_prediction_input = dclean.process_data_for_analysis(hives, actuals=False)
predicted_values = exps.run_predictions(df_prediction_input, joblib_loc)
predictions_only = predicted_values[['hive_id', 'predictions']].set_index('hive_id')

prediction_dict = predictions_only.to_dict()

Unnamed: 0,hive_id,date_added,city,state,honey_pull_id,date_reset,count,temp,bias,num_pollen_patties,...,wax_moths_present,wasps_hornets_present,mice_present,robber_bees_present,has_chalkbrood,has_twisted_larvae,material_Polystyrene,material_Wood,days,predicted
0,182,2023-03-10,Macdonaldside,Maryland,533.0,2025-01-06,5,-2.38,2.4,0.4,...,0,0,0,1,1,0,0,0,30,48.498778
1,183,2023-10-15,East Matthew,West Virginia,537.0,2024-10-22,16,6.9875,1.3125,1.125,...,1,3,1,1,1,2,0,16,106,6.450024
2,184,2024-02-23,Moyerstad,New Mexico,540.0,2024-11-30,10,3.24,1.3,0.9,...,0,1,0,5,0,0,0,10,67,38.562674
3,185,2024-07-29,West Aliciaport,Illinois,542.0,2025-01-27,2,6.3,1.5,1.0,...,0,0,0,0,0,0,0,0,9,63.170861
4,186,2025-01-22,East Donald,Delaware,543.0,2025-01-22,2,-3.8,2.0,1.0,...,0,0,0,1,0,0,0,0,14,55.814161


Testing

In [65]:
hives_mod = dclean.rename_ids(hives)
print('renamed ids...')

df_normalized = dclean.normalize_data(hives_mod)
print('df normalized...')

df_aggregated = dclean.aggregate_data(df_normalized)
print('df aggregated...')

json_normalized = df_normalized.to_dict(orient='list')
print('json normalized...')

json_aggregated = df_aggregated.to_dict(orient='list')
print('json aggregated...')

renamed ids...
df normalized...
df aggregated...
json normalized...
json aggregated...
