In [None]:
import warnings
from time import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from accure_io import PostgresInterface, S3Interface, SnowflakeInterface
from accure_io.s3_battery_data_reader import DataNotFoundError, S3BatteryDataReader
from accure_io._meta_data import MetaData
from accure_io.s3 import list_bucket

In [None]:
warnings.filterwarnings("ignore")

level = "pack"
customer = "senec"
pi = PostgresInterface()
dc = S3Interface.get_latest_data_context(customer=customer)
s3i = S3Interface(dc)

battery_reader = S3BatteryDataReader(tenant=customer, data_version="latest")
sfi = SnowflakeInterface(customer=customer)

# health_path = "s3://accure-production-artifacts/senec/product=reivolution/data_version=2/run_context=submit-20220904/artifact_type=result/group=FCC_monthly/""
health_path = "s3://accure-production-artifacts/senec/product=reivolution/data_version=2/run_context=submit-20221204/artifact_type=result/group=FCC_monthly/"

version = "221205"
save_path = f"s3://accure-sandbox-data/kyung/{customer}/id-list/aging_accure_ids_{version}.parquet"


### ref code

In [None]:
required_ids = 50
DIO = 365.24 * 2 # filter days in operation

"""
find ids
"""

fcc_monthly = sfi.read_table(
        database="staging_senec", schema="reivolution_v2", table_name="reivolution_fcc_monthly"
)
fcc_monthly.columns = fcc_monthly.columns.str.lower()
accure_ids = pi.get_ids(level=level, customer=customer)
t0 = time()
keep_ids = []
operation_days = []

for id_index, accure_id in enumerate(accure_ids, 1):
        try:
                meta = battery_reader.read_meta_data(level=level, accure_id=accure_id)
        except DataNotFoundError:
                continue

        # check days in operation
        days_in_operation = (meta.last_timestamp - meta.first_timestamp).days
        if days_in_operation < DIO:
                continue

        # check availability of health data
        id_fcc_df = fcc_monthly[fcc_monthly["accure_id"] == accure_id]
        if id_fcc_df.shape[0] < 36:
                continue
        nom_cap = meta.configurations.iloc[-1]["customer_datasheet"]['agg_capacity_design']
        soh = id_fcc_df.sort_values("time")['fcc_points']/nom_cap

        # removes health profiles with unexplainably large soh
        mask_strange_behaviour = (soh > 1.15).any()
        # removes health profiles with unexplainably large jumps in soh
        mask_strange_behaviour *= (soh.diff().abs() > 1.0).any()
        # removes health profiles with repeated increases in soh
        mask_strange_behaviour *= (soh.diff() > 0.05).sum() > 5
        # removes health profiles to too many approximately zero changes in soh
        mask_strange_behaviour *= (soh.diff().abs() < 0.01).sum() > 5
        print('checked')
        if mask_strange_behaviour:
                continue

        # store id
        keep_ids.append(accure_id)
        operation_days.append(days_in_operation)

        if len(keep_ids) % 1 == 0:
                t1 = time()
                print(
                f" . {len(keep_ids)} ids found. {(t1-t0) / id_index:.2f} per id searched. {(t1-t0) / len(keep_ids):.2f} per id found"
                )

        if len(keep_ids) == required_ids:
                break

df = pd.DataFrame({"accure_id": keep_ids, "age": operation_days})
df.to_parquet(f"s3://accure-sandbox-data/kyung/{customer}/aging_accure_ids_220926.parquet")

### find ids

In [None]:
files = list_bucket(health_path,full_key=True)
files

In [None]:
files["filename"].to_frame().to_parquet(f"s3://accure-sandbox-data/kyung/{customer}/id-list/s3_id_list.parquet")

In [None]:
files

In [None]:
required_ids = 200
YIO = 3
DIO = 365 * YIO # filter days in operation

"""
find ids
"""

# files = pd.read_parquet(f"s3://accure-sandbox-data/kyung/{customer}/id-list/s3_id_list.parquet")
# files = list_bucket(health_path,full_key=True)
list = files["filename"][id_index:]

t0 = time()
# keep_ids = []
# operation_days = []

for id_index, filename in enumerate(list, 1):

        accure_id = filename[filename.index('=')+1:filename.index('.')]

        try:
                meta = battery_reader.read_meta_data(level=level, accure_id=accure_id)
        except DataNotFoundError:
                # print(id_index,"no meta")
                continue
        
        # check days in operation
        days_in_operation = (meta.last_timestamp - meta.first_timestamp).days
        if days_in_operation < DIO:
                continue
        
        # check availability of health data
        id_fcc_df = pd.read_parquet(f'{health_path}{filename}')
        if id_fcc_df.shape[0] < YIO*12:
                continue
        nom_cap = meta.configurations.iloc[-1]["customer_datasheet"]['agg_capacity_design']
        # soh = id_fcc_df.sort_values("time")['fcc_points']/nom_cap
        soh = id_fcc_df['FCC_POINTS']/nom_cap*100

        # removes health profiles with unexplainably large soh
        mask_strange_behaviour = ((soh > 115).any() or
        # removes health profiles with unexplainably large jumps in soh
        (soh.diff().abs() > 5).any() or
        # removes health profiles with repeated increases in soh
        ((soh.diff() > 0.1).sum() > 1*DIO/365) or
        # removes health profiles to too many approximately zero changes in soh
        (soh.diff().abs() < 0.05).sum() > 2*DIO/365)

        if mask_strange_behaviour:
                continue
        
        # check if timeseries data have gaps
        # time_start = meta.first_timestamp
        # time_end = meta.last_timestamp
        # ts = s3i.get_timeseries_s3(level=level, accure_id=id, time_start=time_start, time_end=time_end)

        # store id
        keep_ids.append(accure_id)
        operation_days.append(days_in_operation)

        t1 = time()
        print(f" . {len(keep_ids)} ids found. {(t1-t0) / id_index:.2f} per id searched. {(t1-t0) / len(keep_ids):.2f} per id found")

        if len(keep_ids) == required_ids:
                break

df = pd.DataFrame({"accure_id": keep_ids, "age": operation_days})
df.to_parquet(save_path)

## Visual check

In [None]:
df = pd.read_parquet(save_path)
# df = df.sort_values(by="age",ascending=False)

In [None]:
%matplotlib qt
# ids = pd.read_parquet(save_path)
ids = df.sort_values(by="age",ascending=False)["accure_id"]
group = 6
end = len(ids)
for i in np.arange(0,end,group):
    plt.figure(figsize=(9,7))
    for j in np.arange(i,i+group):
        id = ids[j]
        meta = battery_reader.read_meta_data(level=level, accure_id=id)
        nom_cap = meta.configurations.iloc[-1]["customer_datasheet"]['agg_capacity_design']
        fcc = pd.read_parquet(f'{health_path}FCC_accure_id={id}.parquet')
        soh = fcc["FCC_POINTS"].reset_index(drop=True)/nom_cap
        soh.plot(xlabel='Time (Month)',ylabel='SOH',label=id)
        if j+1 == end:
            break
    plt.legend()

In [None]:
id = ids[0]
meta = battery_reader.read_meta_data(level=level, accure_id=id)
meta

In [None]:
meta.configurations["customer_datasheet"][0]

In [None]:
time_start = meta.first_timestamp
time_end = meta.last_timestamp
df = s3i.get_timeseries_s3(level=level, accure_id=id, time_start=time_start, time_end=time_end)
df

In [None]:
id = ids['accure_id'][49]
meta = battery_reader.read_meta_data(level=level, accure_id=id)
fcc_df = pd.read_parquet(f'{health_path}/FCC_accure_id={id}.parquet')
nom_cap = meta.configurations.iloc[-1]["customer_datasheet"]['agg_capacity_design']
(fcc_df['FCC_POINTS']/nom_cap).plot(xlabel='Time (Month)',ylabel='SOH')
(fcc_monthly[fcc_monthly["accure_id"] == id].reset_index()['fcc_points']/nom_cap).plot(xlabel='Time (Month)',ylabel='SOH')

In [None]:
for id in ids['accure_id'][0:10]:
    print(id)
    meta = battery_reader.read_meta_data(level=level, accure_id=id)
    fcc_df = pd.read_parquet(f'{health_path}/FCC_accure_id={id}.parquet')
    nom_cap = meta.configurations.iloc[-1]["customer_datasheet"]['agg_capacity_design']
    (fcc_df['FCC_POINTS']/nom_cap).plot(xlabel='Time (Month)',ylabel='SOH',label=id)

In [None]:
for id in ids['accure_id'][11:20]:
    print(id)
    meta = battery_reader.read_meta_data(level=level, accure_id=id)
    fcc_df = pd.read_parquet(f'{health_path}/FCC_accure_id={id}.parquet')
    nom_cap = meta.configurations.iloc[-1]["customer_datasheet"]['agg_capacity_design']
    (fcc_df['FCC_POINTS']/nom_cap).plot(xlabel='Time (Month)',ylabel='SOH',label=id)