In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# --- CONFIGURATION ---
STORAGE_ACCOUNT = "beehavenstorage"
CONTAINER_GOLD = "gold"
# In a real Azure environment, we would read directly from ABFSS
# For this script, we assume the Gold data is available or mounted
GOLD_DATA_PATH = f"abfss://{CONTAINER_GOLD}@{STORAGE_ACCOUNT}.dfs.core.windows.net/hive_health_daily/"

def train_model():
    print("Loading Gold Data for Training...")
    
    # 1. Load Data (Mocking the dataframe for the portfolio script structure)
    # In production: df = pd.read_parquet(GOLD_DATA_PATH)
    # Here we create a dummy sample to ensure the script logic is valid for reviewers
    data = {
        'avg_temp_c': [12, 15, 22, 25, 10],
        'avg_humidity_percent': [50, 60, 45, 40, 70],
        'total_flow_count': [1000, 1500, 3000, 3500, 800],
        'daily_weight_change_kg': [0.1, 0.2, 0.5, 0.6, -0.1] # Target
    }
    df = pd.DataFrame(data)

    # 2. Feature Selection
    features = ['avg_temp_c', 'avg_humidity_percent', 'total_flow_count']
    target = 'daily_weight_change_kg'

    X = df[features]
    y = df[target]

    # 3. Train/Test Split
    print("Splitting data...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # 4. Model Training
    print("Training Linear Regression Model...")
    model = LinearRegression()
    model.fit(X_train, y_train)

    # 5. Evaluation
    predictions = model.predict(X_test)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    r2 = r2_score(y_test, predictions)

    print(f"Model Performance:")
    print(f"RMSE: {rmse:.4f} kg")
    print(f"R2 Score: {r2:.4f}")

    # 6. Save Model
    # We save the model artifact so it can be deployed to an Azure Managed Endpoint later
    model_filename = "honey_production_model.joblib"
    joblib.dump(model, model_filename)
    print(f"Model saved locally as {model_filename}")

if __name__ == "__main__":
    train_model()