## Precursor to krill extraction yield calculations
The script below integrates (as in "calculates the area under the curve") for a couple of tags relevant for yield estimation.<br>
It is inspired by Lars' Grafana dashboard: https://akerbiomarine.grafana.net/d/cdw2jrph7gj5sf/1-houston-weekly-production-dashboard-ll?orgId=1&from=2025-06-02T05:00:00.000Z&to=2025-06-09T04:59:59.000Z&timezone=America%2FChicago<br>
Only the trapezoidal rule is used for integration and no baseline correction is performed. <br>
**This script does nto consider that there is a lag between extraction tank filling and decanter feeding.** Possibly, this cancels out for longer time intervals, or is at least less of a problem?<br>
Following the integrations in the script below, one only needs to do the following to arrive at an estimate of krill oil extraction yield:

* FI323113_EvaporatorThreeDischarge/(FI2120.PV-FI6320.PV)*100 = yield

One needs to split the dataframe obtained via this Python script and perform the above calculation row for row on the individual integration results.<br><br>
**One also needs to make sure that all data are from representative production period. If there are no extractions, the tag readings are not meaningful.**

In [None]:
import pyodbc
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import List, Tuple

# user defined variables
START_TIME = datetime(2025, 5, 11, 11, 0, 0) # SQL‐query start AND first integration interval start, (yyyy, m, d, hr, min, sec)
END_TIME = datetime(2025, 5, 15, 0, 0, 0) # SQL‐query end (and integration cutoff)
INTERVAL_HOURS = 24
# remember, time in data warehouse is UTC time

tags_tables = [
    ("akbm-houston-prod.houston_data.sensor_data_scada", "FI2120.PV"),  # decanter feed flow rate. must be the sum of ethanol and krill meal
    ("akbm-houston-prod.houston_data.sensor_data_scada", "FI323113_EvaporatorThreeDischarge"), # finished krill oil product discharged from evap 3 in units of kg/hr
    ("akbm-houston-prod.houston_data.sensor_data_scada", "FI6320.PV"), # ethanol used for extraction
    ("akbm-houston-prod.houston_data.sensor_data_sulzer2", "FIT_323113"),  # Added lbs/hr signal
]

dsn = 'bq64_system'
conn = pyodbc.connect(f"DSN={dsn}", autocommit=True)

def fetch_1min_series(conn, dataset_table, tagname, start_time, end_time):
    t0 = start_time.strftime("%Y-%m-%d %H:%M:%S")
    t1 = end_time.strftime("%Y-%m-%d %H:%M:%S")
    sql = f"""
    SELECT TIMESTAMP_TRUNC(time, MINUTE) AS ts_min, AVG(value) AS avg_value
    FROM `{dataset_table}`
    WHERE tagname = '{tagname}'
    AND time BETWEEN TIMESTAMP('{t0}') AND TIMESTAMP('{t1}')
    GROUP BY ts_min
    ORDER BY ts_min
    """
    df = pd.read_sql(sql, conn)
    df = df.rename(columns={"ts_min": "timestamp", "avg_value": "value"})
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    df = df.set_index("timestamp").sort_index()
    return df

def build_intervals(start, end, interval_hours):
    intervals = []
    cursor = start
    delta = timedelta(hours=interval_hours)
    while cursor < end:
        next_cursor = cursor + delta
        intervals.append((cursor, min(next_cursor, end)))
        cursor = next_cursor
    return intervals

def integrate_trapz(df, start, end):
    df_int = df.loc[start:end - timedelta(minutes=1), "value"].dropna()
    if df_int.empty:
        return 0.0
    t_secs = (df_int.index.view(np.int64) // 10 ** 9).astype(float)
    t_hours = t_secs / 3600.0
    y = df_int.values
    integral = np.trapz(y, x=t_hours)
    return integral

all_records = []
intervals = build_intervals(START_TIME, END_TIME, INTERVAL_HOURS)

for table, tag in tags_tables:
    df = fetch_1min_series(conn, table, tag, START_TIME, END_TIME)
    for start, end in intervals:
        integral = integrate_trapz(df, start, end)
        all_records.append({
            "tagname": tag,
            "interval_start": start,
            "interval_end": end,
            "integrated_value": integral
        })

summary_df = pd.DataFrame(all_records)
summary_df["interval_start"] = pd.to_datetime(summary_df["interval_start"])
summary_df["interval_end"] = pd.to_datetime(summary_df["interval_end"])
summary_df = summary_df.sort_values(["tagname", "interval_start"]).reset_index(drop=True)

summary_df.to_csv("integration_summary.csv", index=False)
print(f"Integration summary written to 'integration_summary.csv' with {len(summary_df)} rows.")

summary_df.head()
