In [53]:
from datetime import datetime
import pandas as pd
from libcomcat.search import search
import matplotlib.pyplot as plt 
import os
import numpy as np
import pickle

In [3]:
use_grid_id = pickle.load(open('data/use_grid_id.pkl', 'rb'))

In [8]:
use_grid_id.sort()
use_grid_id_dict = {use_grid_id[i]: i for i in range(len(use_grid_id))}

In [13]:

lat_min, lat_max = 32, 36
lon_min, lon_max = -120, -114
lat_step, lon_step = 0.1, 0.1
lat_bins = np.around(np.linspace(lat_min, lat_max, 41),2)
lon_bins = np.around(np.linspace(lon_min, lon_max, 61),2)

In [17]:
grid = {}
for lat in lat_bins[:-1]:
    for lon in lon_bins[:-1]:
        grid[(lat, lon)] = []
area2grid = {}
count = 0
for lat in lat_bins[:-1]:
    for lon in lon_bins[:-1]:
        area2grid[(lat, lon)] = count
        count += 1
grid2area = {v: k for k, v in area2grid.items()}


In [84]:
def construct_earthquake_csv(directory, start_date, end_date, output_file):
    data_frames = []
    for station_file in os.listdir(directory):
        date_range = pd.date_range(start=start_date, end=end_date, freq='D')
        df_temp = pd.DataFrame(index=date_range)
        file_path = os.path.join(directory, station_file)
        if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
            try:
                df = pd.read_csv(file_path)
                if df["Location_id"].iloc[0] in use_grid_id_dict:
                    Location_id = df["Location_id"].iloc[0]
                    df['Time'] = pd.to_datetime(df['Time'], format='ISO8601').dt.date
                    df = df.groupby('Time').agg({'Magnitude': 'max'}).reset_index()
                    df.set_index('Time', inplace=True)
                    magnitude_series = df['Magnitude'].reindex(date_range).fillna(0)
                    df_temp[Location_id] = magnitude_series
                    data_frames.append(df_temp)
            except pd.errors.EmptyDataError:
                continue

    df = pd.concat(data_frames, axis=1)    
    df = df.sort_index(axis=1)
    df.to_csv(output_file)
    return df

directory = 'data/grid_data'
start_date = pd.to_datetime('2001-01-01')
end_date = pd.to_datetime('2024-08-31')
df = construct_earthquake_csv(directory, start_date, end_date, 'data/earthquake_2001_onwards.csv')

(8644, 500)


In [81]:
def construct_energy_csv(directory,output_file):
    data_frames = []
    for station_file in os.listdir(directory):
        file_path = os.path.join(directory, station_file)
        df_temp = pd.DataFrame()
        if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
            try:
                df = pd.read_csv(file_path,index_col=0)
                if df["Location_id"].iloc[0] in use_grid_id_dict:
                    Location_id = df["Location_id"].iloc[0]
                    df_temp[Location_id] = df['Energy']
                    df_temp.index = df.index
                    data_frames.append(df_temp)
            except pd.errors.EmptyDataError:
                continue

    df = pd.concat(data_frames, axis=1) 
    df = df.sort_index(axis=1)   
    df.to_csv(output_file)
    return df
directory = 'data/log_energy_data'
df = construct_energy_csv(directory,'data/energy_2001_onwards.csv')