In [None]:
import pandas as pd
import numpy as np
import pandera as pa
import pandera.extensions as extensions
from pandera.typing import DataFrame, Series, Index, DateTime
from typing import Callable

import jwt
import requests
import json

### API Processes

In [None]:
iss = ""
aud = "erp-core.brusnika.ru/processes"
key = ""

payload = {
    "iss": iss,
    "aud": aud,
}

token = jwt.encode(payload,key)

In [None]:
tasks = {
    "88359fe3-4d1a-4bac-8fee-9ddd32eb9dc1": "permission_date", #РС
    "8b1663d9-a69d-40fd-8b3e-9f51b39d66a8": "construction_start_date", # НС
    "733fb7d0-0ec6-4223-846e-dcc0bebaaca9": "commition_date", #РВ
}

In [None]:
url = 'https://erp-core.brusnika.ru/processes/v2/versions/1ce7fe21-956f-4627-b1aa-cbfc2d6358ba/process-statistics'

request_body ={
    "objectFilter": [
        {
            "projections": [],
            "stages": None,
            "processes": None,
            "tasks": None,
            "contours": None,
            "projects": None,
            "sites": None,
            "parcels": None,
            "constructionObjects": [],
            "objectTypes": ["CONSTRUCTION_OBJECT"]
        }
    ],
    "periodFilter": {
        "since": None,
        "until": None
    },
    "groupBy": "TASK"
}

Запрашиваем каждый процесс/веху отдельно (пока оставила, как изначально было)

In [None]:
resp_data_json = {}

for task_id in tasks.keys():
    request_body['objectFilter'][0].update({"tasks": [task_id]})
    resp = requests.post(url, headers={"authorization":f"Bearer {token}"}, json=request_body)
    resp_data_json.update({task_id: resp.json()})

In [None]:
columns = [
    # 'overallStart', 
    'overallEnd', 
    'constructionObject', 
]

Предобработочка

In [None]:
resp_data_sets_json = {}

for task_id in resp_data_json.keys():
    df = pd.DataFrame(resp_data_json[task_id])[columns]
    
    df.loc[:, 'constructionObject'] = df.constructionObject.apply(lambda x: x['id'])
    df = df.set_index('constructionObject')
    
    resp_data_sets_json.update({task_id: df})

Первая схема, которая была просто на наполнение данных

In [None]:
class Schema_API_process(pa.DataFrameModel):
    # overallStart: Series[pd.Timestamp] = pa.Field(coerce=True, nullable=False)
    overallEnd: Series[pd.Timestamp] = pa.Field(coerce=True, nullable=False)
    constructionObject: Index[str] = pa.Field(isin=construction_objects_id_set, check_name=True, unique=True) # точно unique?

    # class Config:
    # #   этот параметр позволяет пандере удалять из сета все неподходящие под схему значения, но и ошибку для них не выдает
    #     drop_invalid_rows=True 

In [None]:
# for task_id in resp_data_sets_json.keys():
#     df = resp_data_sets_json[task_id]
#     # и делаем для каждой задачи проверку на наны и корректные значения, пример ниже

In [None]:
error = None
try:
                              # если в цикл выше засунуть, то это просто df
    Schema_API_process.validate(resp_data_sets_json[list(tasks.keys())[0]], lazy=True)
except pa.errors.SchemaErrors as err:
    error = err
    display(err.failure_cases)

In [None]:
resp_data_sets_json[list(tasks.keys())[0]].head()

Вывели ошибки по пропускам и косячным значениям

**?  Надо как-то преобразовывать вывод ошибок  ?**

Cобираем сет и удаляем наны

In [None]:
date_cols = []

for task_id in resp_data_sets_json.keys():
    df = resp_data_sets_json[task_id]
    date_col = pd.to_datetime(df.overallEnd, errors='coerce')
    date_col.name = tasks[task_id]
    date_cols.append(date_col)

In [None]:
combine_df = pd.concat(date_cols, axis=1)
combine_df

In [None]:
combine_df.columns

Общий метод сравнения для двух дат

In [None]:
def compare_two_dates(first_date: pd.Series, second_date: pd.Series, compare_func: Callable=pd.Series.gt, threshold: int=0, exclude_na: bool=True) -> pd.Series:
    """Сompares two series of dates by finding the difference between them, 
    and the difference with a threshold value.

    compare_func: pd.Series.gt, pd.Series.ge, pd.Series.lt or pd.Series.le
    threshold: treshold value in days
    """
    diff = second_date - first_date
    result = compare_func(diff, pd.Timedelta(days=threshold))
    if exclude_na:
        na_indexes = (first_date.isna() | second_date.isna())
        # return result[not_na_indexes]
        return result | na_indexes
    return result

In [None]:
# compare_two_dates(combine_df['permission_date'], combine_df['construction_start_date'], pd.Series.ge)

In [None]:
@extensions.register_check_method()
def check_permission_date_less_construction_start_date(df):
    return compare_two_dates(df['permission_date'], df['construction_start_date'], pd.Series.ge, threshold=0, exclude_na=True)

@extensions.register_check_method()
def check_construction_start_date_less_commition_date(df):
    return compare_two_dates(df['construction_start_date'], df['commition_date'], pd.Series.ge, threshold=0, exclude_na=True)

@extensions.register_check_method()
def check_diff_construction_start_date_commition_date_ge_year(df):
    previous_check_result = check_construction_start_date_less_commition_date(df)
    return ~previous_check_result | compare_two_dates(df['construction_start_date'], df['commition_date'], pd.Series.ge, threshold=365, exclude_na=True)

@extensions.register_check_method()
def check_diff_permission_date_commition_date_ge_year(df):
    return compare_two_dates(df['permission_date'], df['commition_date'], pd.Series.ge, threshold=365, exclude_na=True)

@extensions.register_check_method()
def check_diff_permission_date_commition_date_lt_3_5_year(df):
    return compare_two_dates(df['permission_date'], df['commition_date'], pd.Series.lt, threshold=1279, exclude_na=True) #min 1277.5 max 1278.5

Вторая схема, объединяет проверку на пустые значения и корректные типы + сравнение дат

In [None]:
class Schema_process_PC_HC_PB(pa.DataFrameModel):
    permission_date: Series[DateTime] = pa.Field(coerce=True, nullable=False)
    construction_start_date: Series[DateTime] = pa.Field(coerce=True, nullable=False)
    commition_date: Series[DateTime] = pa.Field(coerce=True, nullable=False)
    constructionObject: Index[str] = pa.Field(isin=construction_objects_id_set, check_name=True, unique=True)

    class Config:
        check_permission_date_less_construction_start_date = ()
        check_construction_start_date_less_commition_date = ()
        check_diff_construction_start_date_commition_date_ge_year = ()
        check_diff_permission_date_commition_date_ge_year = ()
        check_diff_permission_date_commition_date_lt_3_5_year = ()

In [None]:
combine_df.iloc[:4]

In [None]:
error = None
try:
    Schema_process_PC_HC_PB.validate(combine_df, lazy=True)
except pa.errors.SchemaErrors as err:
    error = err
    display(err.failure_cases)

тут можно посмотреть данные по конкретной ошибке, если заменить название функции

In [None]:
combine_df.loc[error.failure_cases[error.failure_cases.check == 'check_diff_permission_date_commition_date_lt_3_5_year']['index'].unique()]

In [None]:
print(error)

### API Indicators (показательная система)

In [None]:
iss = ""
aud = "erp-core.brusnika.ru/indicators"
key = ""

payload = {
    "iss": iss,
    "aud": aud,
}

token_indicators = jwt.encode(payload,key)

In [None]:
process_version = "4713f87f-598f-43ec-866b-3a52111b0d92"

In [None]:
url = f'https://erp-core.brusnika.ru/indicators/api/v2/versions/{process_version}/indicator-values'
request_body = [{
    # "requestId": "string",
    "requestId": "",
    "indicators": [
        "po.production.total"
    ],
    "timeSlice": {
        "groupBy": "MONTH",
        "filter": {}
    },
    "objectSlice": {
        "groupBy": [
            "constructionObjectId",
            "itemTreeId"
        ],
        "filter": [
        {
            "constructionObjectId": [],
            "siteId": [],
            "projectId": [],
            "contourId": [],
            "premisesType": [],
            "fundsSourceType": [],
            "grade": [],
            "itemTreeId": [
                "76630097-e1b1-4787-b509-36e4a13c1490",
                "9f90fee4-22b0-4c75-b298-03b9af46a679",
                
                "4734c36c-9c27-44bc-82a1-1265267b0737",
                "05cacdf3-9de7-4bae-8916-59be9ef6c3c2",
                "ce719b53-3bf5-4c30-b8e5-7a1c8feaa4cb",
                "17d417b9-f6f1-44c2-b554-830e7a903274",
                "d3034170-b66f-4914-a2fd-e2cbc986038b",
                "e3bbb51d-07a1-44a0-a628-a1504f87ffe9",
            ]
        }
      ]
    }
    }
]

resp_result = requests.post(url, headers={"authorization":f"Bearer {token_indicators}"}, json=request_body)
resp_result

In [None]:
resp_cost = resp_result.json()

In [None]:
resp_cost_df = pd.DataFrame(resp_cost[0]['slices'])
resp_cost_df.yearMonth = pd.to_datetime(resp_cost_df.yearMonth)
resp_cost_df = resp_cost_df.sort_values(by=['constructionObjectId', 'yearMonth'])

In [None]:
resp_cost_df = resp_cost_df.groupby('constructionObjectId').first()

In [None]:
resp_cost_df.yearMonth.min(), resp_cost_df.yearMonth.max()

In [None]:
resp_cost_df.shape

In [None]:
len(combine_df.index)

### API Parameters

In [None]:
iss = ""
aud = "erp-core.brusnika.ru/parameters"
key = ""

payload = {
    "iss": iss,
    "aud": aud,
}

token_params = jwt.encode(payload,key)

In [None]:
url = 'https://erp-core.brusnika.ru/parameters/v1/versions/1ce7fe21-956f-4627-b1aa-cbfc2d6358ba/construction-objects'
resp_co = requests.get(url, headers={"authorization":f"Bearer {token_params}"})
resp_co

In [None]:
co_df = pd.DataFrame(resp_co.json()).set_index('id')