# Modelos datathon

## Acesso datos externos a fases de producción

In [34]:
import pickle
import pandas as pd

In [None]:
from pathlib import Path
dir = Path('.')
if (dir / 'info.pkl').exists:
    info = pd.read_pickle('info.pkl')
    print('Leyendo datos de info.pkl')
else:
    print('Creando info.pkl')
    datos = pd.read_pickle("dict_dataframes_short.pkl")
    info = {}
    info['biorreactores'] = {}
    info['centrifugas'] = {}

    biorreactores = info['biorreactores']
    centrifugas = info['centrifugas']
    for k in datos.keys():
        if 'Bio' in k:
            biorreactores[k.split('/')[0].split('.')[0].split(' ')[-1]] = datos[k]
        elif 'Cen' in k:
            centrifugas[k.split('/')[0].split('.')[0].split(' ')[-1]] = datos[k]
        else:
            info[k.split('/')[0].split('.')[0]] = datos[k]
    with open('info.pkl', 'wb') as file:
        pickle.dump(info, file)

Ahora los datos tienen una forma más estructurada y fácil de acceder.

In [39]:
from datetime import datetime
def encontrar_subdataset_entre_fechas(df: pd.DataFrame, fecha_inicio: str, fecha_fin: str) -> pd.DataFrame:
    result = {}
    result['DateTime'] = df['DateTime'].map(lambda x: datetime.strptime(x[:-4], '%Y-%m-%d %H:%M:%S'))
    col_bool = (result['DateTime'] >= datetime.strptime(fecha_inicio, '%Y-%m-%d %H:%M:%S')) & (result['DateTime'] <= datetime.strptime(fecha_fin, '%Y-%m-%d %H:%M:%S'))
    return df[col_bool]

encontrar_subdataset_entre_fechas(info['biorreactores']['13169'], '2023-03-17 23:00:00', '2023-03-18 02:00:00')

Unnamed: 0,DateTime,13169_FERM0101.Agitation_PV,13169_FERM0101.Air_Sparge_PV,13169_FERM0101.Biocontainer_Pressure_PV,13169_FERM0101.DO_1_PV,13169_FERM0101.DO_2_PV,13169_FERM0101.Gas_Overlay_PV,13169_FERM0101.Load_Cell_Net_PV,13169_FERM0101.pH_1_PV,13169_FERM0101.pH_2_PV,13169_FERM0101.PUMP_1_PV,13169_FERM0101.PUMP_1_TOTAL,13169_FERM0101.PUMP_2_PV,13169_FERM0101.PUMP_2_TOTAL,13169_FERM0101.Single_Use_DO_PV,13169_FERM0101.Single_Use_pH_PV,13169_FERM0101.Temperatura_PV
284,2023-03-17 23:00:00.000,0.0,0.0,480.0,0.0,,0.0,-19.599628,1.56356,-0.011332,0.0,27.280002,0.0,1486.330176,655.892432,799.623975,16.146649
285,2023-03-17 23:15:00.000,0.0,0.0,480.0,0.0,,0.0,-19.496784,1.56356,-0.011332,0.0,27.280002,0.0,1486.330176,655.892432,799.623975,16.788062
286,2023-03-17 23:30:00.000,0.0,0.0,480.0,0.0,0.0,0.0,-19.504235,1.56356,-0.011332,0.0,27.280002,0.0,1486.330176,655.892432,799.623975,17.027268
287,2023-03-17 23:45:00.000,0.0,0.0,480.0,0.0,,0.0,-19.27078,1.56356,-0.011332,0.0,27.280002,0.0,1486.330176,655.892432,799.623975,17.838418
288,2023-03-18 00:00:00.000,0.0,0.0,480.0,0.0,,0.0,-19.2,1.56356,-0.011332,0.0,27.280002,0.0,1486.330176,655.892432,799.623975,18.057414
289,2023-03-18 00:15:00.000,0.0,0.0,480.0,0.0,,0.0,-19.2,1.56356,-0.011332,0.0,27.280002,0.0,1486.330176,655.892432,799.623975,18.07987
290,2023-03-18 00:30:00.000,0.0,0.0,480.0,0.0,,0.0,-19.2,1.56356,-0.011332,0.0,8.894705,0.0,1486.330176,655.892432,799.623975,18.098775
291,2023-03-18 00:45:00.000,0.0,0.0,480.0,0.0,,0.0,-19.2,1.56356,-0.011332,0.0,27.280002,0.0,1486.330176,655.892432,799.623975,18.094757
292,2023-03-18 01:00:00.000,0.0,0.0,480.0,0.0,,0.0,-19.2,1.56356,-0.011332,0.0,27.280002,0.0,249.033884,655.892432,799.623975,18.012873
293,2023-03-18 01:15:00.000,0.0,0.0,480.0,0.0,,0.0,-19.2,1.56356,-0.011332,0.0,1.429191,0.0,1486.330176,655.892432,799.623975,18.07546


## Obtención de datos

En algunos datasets el ID del lote es `Lote` y en otros es `Orden`

In [42]:
len(info['OF 123456 v02']['Lote'].unique()) == len(info['OF 123456 v02']['Orden'].unique())

True

Creamos un traductor de IDs

In [49]:
from utils import limpiar_string_lote

lote_a_orden = {}
orden_a_lote = {}

for index, row in info['OF 123456 v02'].iterrows():
    orden = row['Orden']
    lote = limpiar_string_lote(row['Lote'])
    lote_a_orden[lote] = orden
    orden_a_lote[orden] = lote
orden_a_lote

{200178572: 23019,
 200179217: 23020,
 200181620: 23021,
 200182428: 23022,
 200182429: 23023,
 200182430: 23024,
 200182431: 23025,
 200182432: 23026,
 200182433: 23027,
 200182434: 23028,
 200182435: 23029,
 200182436: 23030,
 200182437: 23031,
 200182440: 23032,
 200182441: 23033,
 200182442: 23034,
 200182443: 23035,
 200182444: 23036,
 200185569: 23038,
 200185570: 23039,
 200182445: 23040,
 200184533: 23041,
 200184534: 23042,
 200184603: 23043,
 200184604: 23044,
 200184605: 23045,
 200184606: 23046,
 200184607: 23047,
 200184609: 23048,
 200184610: 23049,
 200184611: 23050,
 200184612: 23051,
 200187023: 23053,
 200187024: 23054,
 200187025: 23055,
 200182448: 23057,
 200187026: 23056,
 200182446: 23061,
 200187027: 23060,
 200187028: 23063,
 200187029: 23064,
 200187030: 23065,
 10005176: 23273,
 200187032: 23067,
 200187033: 23068,
 200188840: 23069,
 200188841: 23070,
 200188842: 23071,
 200188843: 23072,
 200188844: 23073,
 200188846: 23075,
 200188847: 23076,
 200188848: 2