In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import math

from picking.utils import *

Se obtiene un archivo .txt con:
- Tiempo real del sismo
- Magnitud
- Tiempo en que se debería observar el sismo en cada estación

Todos estos separados en columnas

In [6]:
# Station coordinates
stations_coord = {
    'CO10': (-29.24, -71.46),
    'AC04': (-28.20, -71.07),
    'AC05': (-28.84, -70.27),
    'CO05': (-29.92, -71.24)
    
}

# P-wave propagation speed
v_P = 8.064

# Read Excel file
df = pd.read_excel('Eventos_24hrs.xlsx')

# Calculate detection times and format DataFrame
formatted_df = calculate_detection_times(df, stations_coord, v_P, magnitude_range = (2,3.9))

# Save DataFrame to a CSV file
formatted_df.to_csv('times_events_under_four.txt', sep=',', index=False)

### Se obtiene un archito txt de catalogs_events_new.xlsx

In [3]:
# Lee el archivo Excel
df = pd.read_excel('catalog_events_new.xlsx', sheet_name="Catalogo_csn")

# Selecciona las columnas que deseas eliminar
columnas = ['n', 'Latitud', 'Longitud', 'Profundidad', 'Esta', 'Primera_obs', 'segunda_Obs', 'tercera_obs']
columnas += [col for col in df.columns if 'Fin' in col]

# Elimina las columnas seleccionadas
df = df.drop(columns=columnas)

# Elimina las filas donde la magnitud sea inferior a 4
df = df[df['Magnitud'] >= 4]

# Ajusta las fechas de las estaciones para que sean iguales en formato a la de la primera columna
columnas_inicio = [col for col in df.columns if 'Inicio' in col]
for col in columnas_inicio:
    df[col] = pd.to_datetime(df[col], format='%H:%M:%S:%f').dt.time
    df[col] = df.apply(lambda row: datetime.combine(row['Fecha UTC ORIGEN'], row[col]), axis=1)
    df[col] = df[col].apply(lambda x: x.strftime('%Y-%m-%dT%H:%M:%S.%fZ'))

# Formatea la columna 'Fecha UTC ORIGEN'
df['Fecha UTC ORIGEN'] = df['Fecha UTC ORIGEN'].apply(lambda x: x.strftime('%Y-%m-%dT%H:%M:%S.%fZ'))

# Guarda el DataFrame en un archivo de texto
df.to_csv('catalog_new_events.txt', sep=',', index=False)


In [4]:
def read_times(filename, time_format, skip_header=True):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        if skip_header:
            next(reader)  # Saltar la cabecera
        return [datetime.strptime(row[0], time_format) for row in reader]
    
read_times('times_events_24hrs_sup40.txt', time_format=f'%Y-%m-%dT%H:%M:%S')

[datetime.datetime(2021, 7, 4, 1, 1, 37),
 datetime.datetime(2021, 7, 4, 1, 29, 15),
 datetime.datetime(2021, 7, 4, 1, 34, 42),
 datetime.datetime(2021, 7, 4, 1, 36, 16),
 datetime.datetime(2021, 7, 4, 2, 57, 48),
 datetime.datetime(2021, 7, 4, 4, 3, 22),
 datetime.datetime(2021, 7, 4, 4, 8, 9),
 datetime.datetime(2021, 7, 4, 15, 5, 28),
 datetime.datetime(2021, 7, 4, 15, 57, 11),
 datetime.datetime(2021, 7, 4, 19, 39, 24),
 datetime.datetime(2021, 7, 4, 23, 40, 25)]

In [23]:
def dentro_rango(t1, t2, rango):
    return abs(t1 - t2) <= rango

def read_times_new(file_path, station_name, time_format):
    # Leer el archivo
    df = pd.read_csv(file_path, sep="\t")
    
    # Extraer la columna de la estación
    station_time = df[f"Inicio_{station_name}"]
    
    # Transformar el formato de tiempo
    transformed_time = pd.to_datetime(station_time, format= time_format).dt.to_pydatetime()
    
    return transformed_time.tolist()
    

def calcular_metricas_new(path_reales, path_predichos, rango):
    resultados_totales = []
    f_positivos = {}
    f_negativos = {}
    v_positivos = {}
    
    rango = timedelta(seconds=rango)

    for path_predicho in path_predichos:
        identificador = os.path.splitext(os.path.basename(path_predicho))[0].split('_')[-1]
        ic(identificador)
        ic(path_new_catalog)
        ic(path_predicho)

        tiempos_reales = read_times_new(path_reales,  station_name=identificador, time_format = f"%Y-%m-%dT%H:%M:%S.%fZ")
        tiempos_predichos = read_times_new(path_predicho, station_name=identificador, time_format=f"%Y-%m-%dT%H:%M:%S.%fZ")

        verdaderos_positivos = []
        falsos_positivos = []
        falsos_negativos = []

        for tiempo_real in tiempos_reales:
            for tiempo_predicho in tiempos_predichos:
                if dentro_rango(tiempo_real, tiempo_predicho, rango):
                    verdaderos_positivos.append(tiempo_predicho)
                    tiempos_predichos.remove(tiempo_predicho)
                    break
            else:
                falsos_negativos.append(tiempo_real)

        falsos_positivos = tiempos_predichos

        resultados = {'Verdaderos Positivos': len(verdaderos_positivos),
                      'Falsos Positivos': len(falsos_positivos),
                      'Falsos Negativos': len(falsos_negativos)}

        presicion = len(verdaderos_positivos) / (len(verdaderos_positivos) + len(falsos_positivos))
        recall = len(verdaderos_positivos) / (len(verdaderos_positivos) + len(falsos_negativos))
        f1_score = 2 * (presicion * recall) / (presicion + recall)

        presicion = round(presicion, 2)
        recall = round(recall, 2)
        f1_score = round(f1_score, 2)

        resultados_totales.append({
            'identificador': identificador,
            'resultados': resultados,
            'presicion': presicion,
            'recall': recall,
            'f1_score': f1_score
        })

        f_positivos[identificador] = list(falsos_positivos)
        f_negativos[identificador] = list(falsos_negativos)
        v_positivos[identificador] = list(verdaderos_positivos)

    return resultados_totales, f_negativos, f_positivos, v_positivos

path_new_catalog = "catalog_new_events.txt"
path_predichos = ['time_trigger_AC04.txt', 'time_trigger_AC05.txt', 'time_trigger_CO05.txt', 'time_trigger_CO10.txt']

results_no_val, f_neg_no_val, f_pos_no_val, t_pos_no_val = calcular_metricas_new(path_new_catalog, path_predichos, rango = 10)

ic| identificador: 'AC04'


ic| path_new_catalog: 'catalog_new_events.txt'
ic| path_predicho: 'time_trigger_AC04.txt'
  transformed_time = pd.to_datetime(station_time, format= time_format).dt.to_pydatetime()


KeyError: 'Inicio_AC04'

In [24]:
def read_times_new(file_path, station_name, time_format):
    # Leer el archivo
    df = pd.read_csv(file_path, sep="\t")
    
    # Extraer la columna de la estación
    station_time = df[f"Inicio_{station_name}"]
    
    # Transformar el formato de tiempo
    transformed_time = pd.to_datetime(station_time, format= time_format).dt.to_pydatetime()
    
    return transformed_time.tolist()
    
read_times_new(path_new_catalog, 'AC04', time_format = f"%Y-%m-%dT%H:%M:%S.%fZ")

  transformed_time = pd.to_datetime(station_time, format= time_format).dt.to_pydatetime()


[datetime.datetime(2021, 7, 4, 1, 1, 50, 423000),
 datetime.datetime(2021, 7, 4, 1, 29, 28, 187000),
 datetime.datetime(2021, 7, 4, 1, 34, 53, 428000),
 datetime.datetime(2021, 7, 4, 1, 36, 30, 489000),
 datetime.datetime(2021, 7, 4, 2, 58, 1, 510000),
 datetime.datetime(2021, 7, 4, 4, 3, 35, 638000),
 datetime.datetime(2021, 7, 4, 4, 8, 22, 566000),
 datetime.datetime(2021, 7, 4, 15, 5, 41, 913000),
 datetime.datetime(2021, 7, 4, 15, 57, 23, 828000),
 datetime.datetime(2021, 7, 4, 19, 39, 38, 212000),
 datetime.datetime(2021, 7, 4, 23, 40, 37, 244000)]

# Esto es solo para el archivo que dura una hora, nada más


In [32]:
# Read Excel file
df = pd.read_excel('Eventos_24hrs.xlsx')
# Tomar solo los eventos entre las horas 00:30 y 01:30
df = df[(df['Fecha UTC'] >= datetime(2021, 7, 4, 1, 00)) & (df['Fecha UTC'] <= datetime(2021, 7, 4, 4, 30))]

# Calculate detection times and format DataFrame
formatted_df_chiquito = calculate_detection_times(df, stations_coord, v_P, magnitude_thr=3.5)

# Save DataFrame to a CSV file
formatted_df_chiquito.to_csv('times_events_24hrs_chiquito.txt', sep=',', index=False)

In [33]:
formatted_df_chiquito

Unnamed: 0,Fecha UTC,Magnitud,Hora detección estación CO10,Hora detección estación AC04,Hora detección estación AC05,Hora detección estación CO05
0,2021-07-04T01:01:37,5.9,2021-07-04T01:01:43,2021-07-04T01:01:47,2021-07-04T01:01:52,2021-07-04T01:01:52
1,2021-07-04T01:07:22,3.6,2021-07-04T01:07:29,2021-07-04T01:07:31,2021-07-04T01:07:38,2021-07-04T01:07:38
7,2021-07-04T01:29:15,5.8,2021-07-04T01:29:21,2021-07-04T01:29:25,2021-07-04T01:29:30,2021-07-04T01:29:31
8,2021-07-04T01:34:42,4.0,2021-07-04T01:34:48,2021-07-04T01:34:51,2021-07-04T01:34:56,2021-07-04T01:34:58
9,2021-07-04T01:36:16,4.1,2021-07-04T01:36:22,2021-07-04T01:36:24,2021-07-04T01:36:30,2021-07-04T01:36:32
14,2021-07-04T01:50:43,3.6,2021-07-04T01:50:50,2021-07-04T01:50:51,2021-07-04T01:50:57,2021-07-04T01:50:59
16,2021-07-04T01:57:43,3.7,2021-07-04T01:57:48,2021-07-04T01:57:53,2021-07-04T01:57:58,2021-07-04T01:57:58
17,2021-07-04T02:05:56,3.7,2021-07-04T02:06:02,2021-07-04T02:06:05,2021-07-04T02:06:10,2021-07-04T02:06:12
18,2021-07-04T02:07:44,3.5,2021-07-04T02:07:50,2021-07-04T02:07:53,2021-07-04T02:07:59,2021-07-04T02:08:00
19,2021-07-04T02:08:29,3.6,2021-07-04T02:08:34,2021-07-04T02:08:39,2021-07-04T02:08:44,2021-07-04T02:08:44
