In [None]:
import os
import glob
import numpy as np
import pandas as pd
# import proplot as pplt
import matplotlib.pyplot as plt

from apertools import plotting, utils, lowess
import seaborn as sns

# import xarray as xr

# import hvplot.xarray
# import panel as pn

# pn.extension()
# pplt.rc.update({"subplots.share": False, "subplots.span": False})


# sns.set_style(style="white")
plotting.set_style(size=16)


RNG = np.random.default_rng()

%matplotlib inline
# %matplotlib widget

%load_ext autoreload
%autoreload 2

In [None]:
data_path = "/Users/scott/Documents/Learning/masters-thesis/fracking-qgis-data/"

# Texas earthquakes by year

In [None]:
%run -n ~/repos/apertools/helpers/earthquakes_usgs.py

In [None]:
with utils.chdir_then_revert(data_path):
    df_m25 = pd.concat(
    [
        get_eqs_by_state_year(f)
        for f in sorted(glob.glob("query_*m25*csv"), reverse=True)
    ]
    ).reset_index(drop=True)
    
df_tx_m25 = df_m25[df_m25.state == "Texas"]

In [None]:
with utils.chdir_then_revert(data_path):

    df_m3 = pd.concat(
    [
        get_eqs_by_state_year(f)
        for f in sorted(glob.glob("query_*m3*csv"), reverse=True)
    ]
    ).reset_index(drop=True)
    
df_tx_m3 = df_m3[df_m3.state == "Texas"]

In [None]:
top7_states = df_m3.head(7).state

df_top7 = df_m3[df_m3.state.isin(top7_states)]
df_top7

# Load oil production summary

In [None]:
df_oil = pd.read_csv(data_path + "PermianAnnualProductionData.csv")
df_oil = df_oil[df_oil['year'] < 2021]  # 2021 hasn't fully filled in

In [None]:
plotting.set_style(size=16, weight="normal", minor_ticks=False)

# Figure: chapter 1 plot of eqs and oil

In [None]:
def tick_over_lim(ax):
    ticks = ax.get_yticks()
    lims = ax.get_ylim()
    return (lims[-1] - lims[0]) / (ticks[-1] - ticks[0])
tick_over_lim(ax), tick_over_lim(ax2)

In [None]:
def calculate_ticks(ax, ticks, round_to=0.1, center=False):
    upperbound = np.ceil(ax.get_ybound()[1]/round_to)
    lowerbound = np.floor(ax.get_ybound()[0]/round_to)
    print(upperbound, lowerbound)
    dy = upperbound - lowerbound
    fit = np.floor(dy/(ticks - 1)) + 1
    dy_new = (ticks - 1)*fit
    if center:
        offset = np.floor((dy_new - dy)/2)
        lowerbound = lowerbound - offset
    values = np.linspace(lowerbound, lowerbound + dy_new, ticks)
    return values*round_to

In [None]:
ntick = 5

fig, ax1 = plt.subplots(figsize=(5, 5))
l1 = ax1.plot(df_tx_m3['year'], df_tx_m3['count'], lw=4, marker='d', label="EQs")


ymax = 200
ax1.set_ylim(0, ymax*1.05)
ax1.set_yticks(np.linspace(0, ymax, ntick)) # make sure each has same number of ticks
print(ax1.get_ybound(), ax1.get_ylim())
# ax1.set_yticks(np.linspace(*ax1.get_ybound(), ntick))

ax1.set_ylabel("M3+ Earthquake Count")

ax2 = ax1.twinx()
l2 = ax2.plot(df_oil.year, df_oil.production / 365.25 / 1e6, lw=4, color="C3", label="oil", marker='.')

ymax = 3
ax2.set_ylim(0, ymax*1.05)
ax2.set_yticks(np.linspace(0, ymax, ntick)) # make sure each has same number of ticks

# l2 = ax2.plot(df_oil.year, df_oil.injection / 365.25 / 1e6, lw=4, color="C3", label="oil", marker='.')
# ax2.set_yticks(np.linspace(0, 20, ntick)

# ax2.set_yticks(np.linspace(*ax2.get_ybound(),5))
ax2.set_ylabel("Daily oil production [MBBl / day]")

# ax1.set_yticks(calculate_ticks(ax1, 5, 0.5))
# ax2.set_yticks(calculate_ticks(ax2, 5, 0.5))
ax1.grid()

ax1.legend([l1[0], l2[0]], ['EQs', 'Oil'])

fig.savefig("../scratch/earthquakes_texas_m3.pdf")

In [None]:
# # sns.lineplot(data=df_m3[df_m3.state == "Texas"], x="year", y="count", hue="state")

# fig, ax = plt.subplots(figsize=(5, 5))
# ax.plot(df_tx_m25['year'], df_tx_m25['count'], lw=4, marker='d')
# # df_tx_m3.plot(ax=ax, x='year', y='count', lw=4, marker='d')
# # ax.plot(df_tx_m25['year'].values, df_tx_m25['count'].values)
# ax.grid()
# ax.set_ylabel("M2.5+ Earthquake Count")

# fig.savefig("../scratch/earthquakes_texas_m3_onlyeq.pdf")

## Cumulative graph

In [None]:
years = df_tx_m3.year
years_all = np.arange(years.min(), years.max()+1)

# .sort_values("year")

In [None]:
fig, ax1 = plt.subplots(figsize=(5, 5))
l1 = ax1.plot(df_tx_m3.sort_values("year")['year'], df_tx_m3.sort_values("year")['count'].cumsum(), lw=4, marker='d', label="EQs")


In [None]:
df_filled

In [None]:
# df_tx_m3.sort_values('year')
all_years = np.arange(df_tx_m3.year.min(), df_tx_m3.year.max() + 1)
df_filled = df_tx_m3[['year', 'count']].sort_values('year').set_index('year').reindex(all_years, fill_value=0)
df_filled.rolling(4).mean()

In [None]:
ntick = 5

fig, ax1 = plt.subplots(figsize=(5, 5))
l1 = ax1.plot(df_tx_m3['year'], df_tx_m3['count'], lw=4, marker='d', label="EQs")


ymax = 200
ax1.set_ylim(0, ymax*1.05)
ax1.set_yticks(np.linspace(0, ymax, ntick)) # make sure each has same number of ticks
print(ax1.get_ybound(), ax1.get_ylim())
# ax1.set_yticks(np.linspace(*ax1.get_ybound(), ntick))

ax1.set_ylabel("M3+ Earthquake Count")

ax2 = ax1.twinx()
l2 = ax2.plot(df_oil.year, df_oil.production / 365.25 / 1e6, lw=4, color="C3", label="oil", marker='.')

ymax = 3
ax2.set_ylim(0, ymax*1.05)
ax2.set_yticks(np.linspace(0, ymax, ntick)) # make sure each has same number of ticks

# l2 = ax2.plot(df_oil.year, df_oil.injection / 365.25 / 1e6, lw=4, color="C3", label="oil", marker='.')
# ax2.set_yticks(np.linspace(0, 20, ntick)

# ax2.set_yticks(np.linspace(*ax2.get_ybound(),5))
ax2.set_ylabel("Daily oil production [MBBl / day]")

# ax1.set_yticks(calculate_ticks(ax1, 5, 0.5))
# ax2.set_yticks(calculate_ticks(ax2, 5, 0.5))
ax1.grid()

ax1.legend([l1[0], l2[0]], ['EQs', 'Oil'])

# fig.savefig("../scratch/earthquakes_texas_m3.pdf")

## Alternative oil source: Texas RRC total dump

In [None]:
# https://www.rrc.texas.gov/media/50ypu2cg/pdq-dump-user-manual.pdf
df = pd.read_csv(data_path + "OG_DISTRICT_CYCLE_DATA_TABLE.dsv", sep="}")
df.columns = df.columns.str.lower()

# Districts: https://www.rrc.texas.gov/oil-and-gas/major-oil-and-gas-formations/permian-basin/
df_permian = df.loc[df.district_name.isin(['7C', '08', '8A']), :]

df_permian.rename({'cycle_year': 'year', 'cycle_month': 'month'}, inplace=True, axis=1)

df_permian.loc[:, 'date'] = pd.to_datetime(df_permian.loc[:, ['year', 'month']].assign(day=1))
df_totals = df_permian[['date', 'dist_oil_prod_vol']].groupby('date').sum()
df_totals.head()

In [None]:
fig, ax = plt.subplots(figsize=(7, 7))
(df_totals / 30 / 1e6).plot(ax=ax, marker='.')
ax.set_title("Daily average production")
ax.set_ylabel("MBBl / day")

# Example GPS station 

In [None]:
from apertools import gps, gps_plots
import proplot as pplt

In [None]:
fig, axes = gps_plots.plot_gps_enu(station="TXMC", start_date="2015-01-01", end_date="2019-01-01")
ax = axes[2]
ax.xaxis_date()

In [None]:
fig.savefig?

In [None]:
from matplotlib.dates import YearLocator
locator = YearLocator()

In [None]:
fig, axes = gps_plots.plot_gps_enu(station="TXMC", start_date="2015-01-01", end_date="2019-01-01", ylim=(-2, 2))
for ax in axes:
    ax.xaxis.set_major_locator(locator)
# ax = axes[2]
# axes.format(ylim=(-2, 2))
# ax.set_ylim((-1.9, 2))
fig.savefig("../figures/chapter3-permian/gps-txmc.pdf")
# axes[0].set_ylabel("asf")

In [None]:
!pwd