In [1]:
import pandas as pd
import glob
import re
import numpy as np

import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import cohen_kappa_score

anomalias = ['CurvaCola']
metricas = ['roc_auc','accuracy','precision','kappa','sensibilidad','especificidad']

In [2]:
archivos = glob.glob("../Datasets/Dataset_0/*.csv")

lista = []

for archivo in archivos:
    df = pd.read_csv(archivo, sep=";", header=0, usecols=['fecha','FormacionNIRCurva','ColaTipo'])
    lista.append(df)

df = pd.concat(lista, axis=0, ignore_index=True)

df.head(2)

Unnamed: 0,fecha,FormacionNIRCurva,ColaTipo
0,2021-02-01 00:00:54.884 UTC,FR-2130+BU-3009,fr 2130
1,2021-02-01 00:00:54.884 UTC,FR-2130+BU-3009,fr 2130


In [3]:
y = pd.read_csv('../Metodos/PyOD/Resultados/MAD_completo.csv')
y['fecha'] = y['fecha'].map(lambda x: x[0:16])
y['fecha']=pd.to_datetime(y['fecha'])
y = y.set_index('fecha')

In [4]:
#df = pd.read_csv(r'../Datasets\Dataset_0\000000000000.csv', sep=";", header=0, usecols=['fecha','FormacionNIRCurva','ColaTipo'])
df['fecha'] = df['fecha'].map(lambda x: x[0:16])
df['fecha']=pd.to_datetime(df['fecha'])

df['ColaTipo'] = df['ColaTipo'].astype(str).map(lambda x: re.findall(r'\d+', x) if any(i.isdigit() for i in x)==True else x)
df['ColaTipo'] = df['ColaTipo'].map(lambda x: x[0] if any(i.isdigit() for i in x)==True else x)

df['CurvaCola_text'] = df['FormacionNIRCurva'] +'--'+ df['ColaTipo']
df = df.drop(['FormacionNIRCurva','ColaTipo'], axis=1)

df = df.groupby(pd.Grouper(key='fecha', freq='1min')).agg(pd.Series.mode)
df

Unnamed: 0_level_0,CurvaCola_text
fecha,Unnamed: 1_level_1
2021-02-01 00:00:00,FR-2130+BU-3009--2130
2021-02-01 00:01:00,FR-2130+BU-3009--2130
2021-02-01 00:02:00,FR-2130+BU-3009--2130
2021-02-01 00:03:00,FR-2130+BU-3009--2130
2021-02-01 00:04:00,FR-2130+BU-3009--2130
...,...
2022-02-28 23:55:00,FR1350+BU3009--1350
2022-02-28 23:56:00,FR1350+BU3009--1350
2022-02-28 23:57:00,FR1350+BU3009--1350
2022-02-28 23:58:00,FR1350+BU3009--1350


In [5]:
df1 = df['CurvaCola_text'].astype(str).map(lambda x: 0 if x == '[]' else x)
indices = df1[df1!=0].index
y['CurvaCola_text'] = df1[indices]

In [6]:
y = y.sort_index().dropna()

In [7]:
curvas = y['CurvaCola_text'].unique()

In [8]:
curvas_buenas = pd.read_csv('../Datos/CombinacionesCurvaColaOK.csv')
curvas_buenas['ColaTipo'] = curvas_buenas['ColaTipo'].astype(str).map(lambda x: re.findall(r'\d+', x) if any(i.isdigit() for i in x)==True else x)
curvas_buenas['ColaTipo'] = curvas_buenas['ColaTipo'].map(lambda x: x[0] if any(i.isdigit() for i in x)==True else x)
curvas_buenas['curvas'] = curvas_buenas['FormacionNIRCurva'] +'--' + curvas_buenas['ColaTipo']

In [9]:
curvas_buenas['curvas'].unique()

array(['FR-2116+BU-300--2116', 'FR-2130+BU-3009--2130',
       'FR-2130+BU-3009--2290', 'FR-2130+BU-3009--2061',
       'FR-2116+BU3009--2116', 'FR1350+BU300--1350',
       'FR1350+BU3009--1350', 'FR2130+BU300--2130', 'FR2130+BU300--2061',
       'FR2116+BU300--2116', 'IGN OREMBE--2265', 'IGN OREMBE--2260',
       'IGN OREMBER--2265', 'IGN OREMBER--2260', 'IGNIFUGO(FR2260--2265',
       'IGNIFUGO(FR2260--2260', 'MD--Mdi', 'MD--mdi', 'MDI--Mdi',
       'MDI--MDI', 'MDI--mdi', 'MDI ACETILADO--MDI', 'MDI ACETILAD--Mdi',
       'FR2116+BU3009--2116', 'FR2130+BU3009--2130',
       'FR2130+BU3009--2061', 'IGN OREMBERFR21--2260',
       'MDI ACETILADA--Mdi'], dtype=object)

In [10]:
curvas

array(['FR2130+BU300--2130', 'FR2130+BU300--2260', 'FR2130+BU300--1350',
       'FR1350+BU300--1350', 'IGNIFUGO(FR2260--2260', 'IGN OREMBE--2260',
       'IGN OREMBE--1350', 'FR1350+BU300--2130', 'FR1350+BU300--2116',
       'FR2116+BU300--2116', 'FR2116+BU300--1350', 'FR1350+BU300--2260',
       'FR2116+BU300--2130', 'MD--mdi', 'FR2116+BU300--2260',
       'IGNIFUGO(FR2260--1350', 'FR1350+BU300--2061', 'FR2116+BU300--mdi',
       'MDI ACETILAD--Mdi', 'IGN OREMBE--2265', 'MD--1350',
       'FR1350+BU300--2265', 'IGNIFUGO(FR2260--2265',
       'FR2130+BU300--2290', 'FR2130+BU300--2061', 'FR1350+BU300--mdi',
       'FR2116+BU3009--1350', 'FR1350+BU3009--1350',
       'FR1350+BU3009--2116', 'FR2116+BU3009--2116',
       'FR1350+BU3009--2130', 'FR2130+BU3009--2130',
       'FR2130+BU3009--2061', 'FR2130+BU3009--1350',
       'FR2130+BU3009--2260', 'MDI--mdi', 'FR1350+BU3009--2265',
       'IGN OREMBER--2265', 'IGN OREMBER--1350', 'FR2130+BU3009--2265',
       'FR2116+BU3009--2130', 'FR2116

In [11]:
curvas = np.setdiff1d(curvas,curvas_buenas['curvas'].unique())

In [12]:
pd.DataFrame(curvas)[0].map(lambda x: re.sub('--','/',x))

0        FR1350+BU300/2061
1        FR1350+BU300/2116
2        FR1350+BU300/2130
3        FR1350+BU300/2260
4        FR1350+BU300/2265
5         FR1350+BU300/mdi
6       FR1350+BU3009/2116
7       FR1350+BU3009/2130
8       FR1350+BU3009/2265
9        FR1350+BU3009/mdi
10       FR2116+BU300/1350
11       FR2116+BU300/2130
12       FR2116+BU300/2260
13        FR2116+BU300/mdi
14      FR2116+BU3009/1350
15      FR2116+BU3009/2061
16      FR2116+BU3009/2130
17      FR2116+BU3009/2265
18       FR2130+BU300/1350
19       FR2130+BU300/2260
20       FR2130+BU300/2290
21      FR2130+BU3009/1350
22      FR2130+BU3009/2260
23      FR2130+BU3009/2265
24         IGN OREMBE/1350
25        IGN OREMBER/1350
26        IGN OREMBER/2130
27    IGNIFUGO(FR2260/1350
28                 MD/1350
29                MDI/1350
Name: 0, dtype: object

In [13]:
df_metricas = pd.DataFrame(index=curvas,columns=metricas)
metricas = ['roc_auc','accuracy','precision','kappa','sensibilidad','especificidad']
for curva in curvas:
        #df_metricas.loc[curva,metricas[0]] = roc_auc_score(y[y['CurvaCola_text']==curva]['CurvaCola'], y[y['CurvaCola_text']==curva]['Y_pred'])
        df_metricas.loc[curva,metricas[1]] = accuracy_score(y[y['CurvaCola_text']==curva]['CurvaCola'], y[y['CurvaCola_text']==curva]['Y_pred'])
        df_metricas.loc[curva,metricas[2]] = precision_score(y[y['CurvaCola_text']==curva]['CurvaCola'], y[y['CurvaCola_text']==curva]['Y_pred'])
        df_metricas.loc[curva,metricas[3]] = cohen_kappa_score(y[y['CurvaCola_text']==curva]['CurvaCola'], y[y['CurvaCola_text']==curva]['Y_pred'])
        df_metricas.loc[curva,metricas[4]] = recall_score(y[y['CurvaCola_text']==curva]['CurvaCola'], y[y['CurvaCola_text']==curva]['Y_pred'])
        df_metricas.loc[curva,metricas[5]] = recall_score(y[y['CurvaCola_text']==curva]['CurvaCola'], y[y['CurvaCola_text']==curva]['Y_pred'], pos_label=0)

df_metricas

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Unnamed: 0,roc_auc,accuracy,precision,kappa,sensibilidad,especificidad
FR1350+BU300--2061,,0.026316,1.0,0.0,0.026316,0.0
FR1350+BU300--2116,,0.0,0.0,0.0,0.0,0.0
FR1350+BU300--2130,,0.283465,1.0,0.0,0.283465,0.0
FR1350+BU300--2260,,0.0,0.0,0.0,0.0,0.0
FR1350+BU300--2265,,0.0,0.0,0.0,0.0,0.0
FR1350+BU300--mdi,,0.0,0.0,0.0,0.0,0.0
FR1350+BU3009--2116,,0.017341,1.0,0.0,0.017341,0.0
FR1350+BU3009--2130,,0.015152,1.0,0.0,0.015152,0.0
FR1350+BU3009--2265,,0.047619,1.0,0.0,0.047619,0.0
FR1350+BU3009--mdi,,0.0,0.0,0.0,0.0,0.0


In [14]:
df_metricas.to_csv('InformeCurvas.csv')