In [10]:
import data_processing as dclean
from config import app, db
from models import Hive, ModelHistory
import pandas as pd
import numpy as np
import experience_study as exps
import uuid
import joblib

Clean Hive Data for Study

In [11]:
with app.app_context():
    hives = [hive.to_dict() for hive in Hive.query.all()]

KeyboardInterrupt: 

In [3]:
hives_mod = dclean.rename_ids(hives)
df_normalized = dclean.normalize_data(hives_mod)
df_aggregated = dclean.aggregate_data(df_normalized)

Run Study and Save to Joblib

In [12]:
explanatory_variables=exps.pull_explanatory_variables(df_aggregated)
joblib_loc = f'exp_study{uuid.uuid4().hex}.joblib'

model, test_results = exps.create_model(df_aggregated, explanatory_variables, joblib_loc)

Save Joblib location to Database

In [13]:
with app.app_context():
    # Save metadata for new model to database
    new_study = ModelHistory(
        joblib_loc = joblib_loc
    )

    # Add the new study to the database and commit
    db.session.add(new_study)
    db.session.commit()

In [14]:
test_results[['weight', 'predictions']]

Unnamed: 0,weight,predictions
13,21.043748,-2789.90379
45,33.759202,-3107.818483
47,160.907706,-1076.207228
44,195.91951,-1439.835803
17,26.898822,-3321.199253
27,66.974855,-1549.455214
26,11.253755,-3209.759299
25,159.759734,-1365.516249
31,49.134799,-2638.242879
19,164.044244,-1318.973863


In [20]:
def run_predictions(hives):
    df_normalized, df_prediction_input = dclean.process_data_for_analysis(hives, actuals=False)
    df_prediction_input.head()

    with app.app_context():
        # Fetch the latest model (you can adjust this depending on your logic for "active" model)
        model_record = ModelHistory.query.filter(
            ModelHistory.end_date == None, 
            ModelHistory.start_date != None
        ).first()

    if not model_record:
        raise ValueError('No active model found')

    # Load the model, scaler, and explanatory variables from the joblib file
    joblib_data = joblib.load(model_record.joblib_loc)
    model = joblib_data['model']
    scaler = joblib_data['scaler']
    explanatory_variables = joblib_data['explanatory_variables']

    # Make predictions using the active model
    predicted_values = exps.add_predicted_values(explanatory_variables, df_prediction_input, model, scaler)

    return predicted_values

In [21]:
run_predictions(hives)

ValueError: No active model found

In [None]:


exps.add_predicted_values(df_prediction_input, explanatory_variables, scalar)

In [85]:
test_results[['weight', 'predictions']]

Unnamed: 0,weight,predictions
13,21.043748,-2789.90379
45,33.759202,-3107.818483
47,160.907706,-1076.207228
44,195.91951,-1439.835803
17,26.898822,-3321.199253
27,66.974855,-1549.455214
26,11.253755,-3209.759299
25,159.759734,-1365.516249
31,49.134799,-2638.242879
19,164.044244,-1318.973863


In [65]:
hives_mod = dclean.rename_ids(hives)
print('renamed ids...')

df_normalized = dclean.normalize_data(hives_mod)
print('df normalized...')

df_aggregated = dclean.aggregate_data(df_normalized)
print('df aggregated...')

json_normalized = df_normalized.to_dict(orient='list')
print('json normalized...')

json_aggregated = df_aggregated.to_dict(orient='list')
print('json aggregated...')

renamed ids...
df normalized...
df aggregated...
json normalized...
json aggregated...
