In [1]:
import data_processing as dclean
from config import app, db
from models import Hive, ModelHistory
import pandas as pd
import numpy as np
import experience_study as exps
import uuid
import joblib
from datetime import date

Functions

Clean Hive Data for Study

In [2]:
with app.app_context():
    hives = [hive.to_dict() for hive in Hive.query.all()]

In [3]:
df_normalized, df_aggregated = dclean.process_data_for_analysis(hives)

df_aggregated.head()

Unnamed: 0,hive_id,date_added,city,state,honey_pull_id,date_reset,weight,date_pulled,count,temp,...,hive_beetles_present,wax_moths_present,wasps_hornets_present,mice_present,robber_bees_present,has_chalkbrood,has_twisted_larvae,material_Polystyrene,material_Wood,days
0,182,2023-03-10,Macdonaldside,Maryland,528.0,2023-03-10,40.510924,2023-08-30,25,22.552,...,20,0,17,0,10,6,1,0,0,173
1,182,2023-03-10,Macdonaldside,Maryland,529.0,2023-09-13,150.918416,2023-12-21,15,14.806667,...,5,2,2,0,0,0,3,0,0,99
2,182,2023-03-10,Macdonaldside,Maryland,530.0,2023-12-28,101.099227,2024-06-09,24,7.3,...,5,1,1,3,2,0,2,0,0,164
3,182,2023-03-10,Macdonaldside,Maryland,531.0,2024-06-10,61.505208,2024-09-10,14,29.707143,...,5,3,6,0,4,0,0,0,0,92
4,182,2023-03-10,Macdonaldside,Maryland,532.0,2024-09-20,158.11645,2025-01-01,15,11.78,...,2,2,1,0,2,2,1,0,0,103


Run Study and Save to Joblib

In [4]:
explanatory_variables=exps.pull_explanatory_variables(df_aggregated)
joblib_loc = f'exp_study{uuid.uuid4().hex}.joblib'

model, test_results, importance_df = exps.create_model(df_aggregated, explanatory_variables, joblib_loc)

test_results[['weight', 'predicted']]



Unnamed: 0,weight,predicted
13,21.043748,3.726869
45,33.759202,-17.025353
47,160.907706,6.120065
44,195.91951,21.503808
17,26.898822,-8.95681
27,66.974855,6.429337
26,11.253755,-17.851267
25,159.759734,32.083246
31,49.134799,-4.32306
19,164.044244,7.303572


Save Joblib location to Database

In [5]:
with app.app_context():
    # Save metadata for new model to database
    new_study = ModelHistory(
        joblib_loc = joblib_loc
    )

    # Add the new study to the database and commit
    db.session.add(new_study)
    db.session.commit()

Update active model in Database

In [6]:
with app.app_context():
    prior_model = ModelHistory.query.filter(
        ModelHistory.end_date == None, 
        ModelHistory.start_date != None
    ).first()

    # Set the end date
    prior_model.end_date = date.today()

    current_model = ModelHistory.query.filter_by(
        joblib_loc=joblib_loc
    ).first()

    # Set the start and end dates
    current_model.start_date = date.today()
    current_model.end_date = None

    # Commit changes to the database
    db.session.commit()

In [8]:
joblib_loc = exps.get_latest_joblib()
df_normalized, df_prediction_input = dclean.process_data_for_analysis(hives, actuals=False)
predicted_values = exps.run_predictions(df_prediction_input, joblib_loc)
predictions_only = predicted_values[['hive_id', 'predicted']].set_index('hive_id')

prediction_dict = predictions_only.to_dict()
prediction_dict

{'predicted': {182: 50.005228046695045,
  183: 4.943931974158998,
  184: 38.256120753617076,
  185: 61.49723331360177,
  186: 54.18399634849368,
  187: -4.744911414404383,
  188: -6.988642964134576,
  189: 30.859511321367147,
  190: 44.43342737883674,
  191: 42.35350114273257,
  192: -8.17860355284741,
  193: 52.19300025921084,
  194: 40.923353996053095,
  195: 46.96648204741701,
  196: 4.081078738738901,
  197: 60.14447182338115,
  199: 28.913359824238288}}

Testing

In [65]:
hives_mod = dclean.rename_ids(hives)
print('renamed ids...')

df_normalized = dclean.normalize_data(hives_mod)
print('df normalized...')

df_aggregated = dclean.aggregate_data(df_normalized)
print('df aggregated...')

json_normalized = df_normalized.to_dict(orient='list')
print('json normalized...')

json_aggregated = df_aggregated.to_dict(orient='list')
print('json aggregated...')

renamed ids...
df normalized...
df aggregated...
json normalized...
json aggregated...
