# ENGSCI 700 Geothermal Reservoir Optimisation

This workbook is for extracting Contact well data and recreating the plots.

(Unix) launch with `cd src` >`jupyter notebook`

File structure: 
```
(root)
├── src
│    └── Python Test.ipynb
└── wairakei_data
     └── Liquid wells (version 1).xlsx
```

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

try:
    xl
except:
    xl = pd.ExcelFile('../wairakei_data/Liquid wells (version 1).xlsx')
print(xl.sheet_names[:5], '...')

## Prepare data

In [None]:
df = xl.parse('wk255')                                       # select well data
df = df.loc[:, ~df.columns.str.contains('^Unnamed|SUMMARY')] # remove extra columns
df['mf'] = pd.to_numeric(df['mf'], errors='coerce')          # remove 'dummy' entries
df = df.dropna(subset=['date', 'whp', 'mf'])                 # remove NA
print(df.head())

## Create exploratory plots

In [None]:
cmap = plt.get_cmap('viridis')
indices = np.linspace(0, cmap.N, len(df))
my_colors = [cmap(int(i)) for i in indices]

fig, (ax1, ax2) = plt.subplots(1,2)

df.plot('date', 'whp', style='x', ax=ax1)
ax1.set_xlabel('date')
ax1.set_ylabel('whp')

for i, idx in enumerate(df.index):
    ax2.plot(df.loc[idx, 'whp'], df.loc[idx, 'mf'], 'o', color=my_colors[i])
ax2.set_xlabel('whp')
ax2.set_ylabel('mf')
plt.show()

## Set up regression data and create prediction frame for plotting

In [None]:
from itertools import product

# convert date to a numeric value (days since first)
df['date_numeric'] = (df['date'] - df['date'].min())  / np.timedelta64(1,'D')

# generate prediction dataframe for plotting
date_pred = np.linspace(np.min(df['date_numeric']), np.max(df['date_numeric']), 6)
whp_pred = np.linspace(0, 16, 1000)
pred = pd.DataFrame(list(product(date_pred, whp_pred)), columns=['date_numeric', 'whp'])

## Perform regression and prediction

In [None]:
from statsmodels.formula.api import ols

# Not conditioned on date
model1 = ols("mf ~ whp", data=df)
results1 = model1.fit()
pred['mf1'] = results1.predict(pred)

# Linear fit dependent on date
model2 = ols("mf ~ whp + date_numeric", data=df)
results2 = model2.fit()
pred['mf2'] = results2.predict(pred)

# Elliptic fit dependent on date
model3 = ols("np.power(mf,2) ~ np.power(whp,2) + date_numeric", data=df)
results3 = model3.fit()
pred['mf^2'] = results3.predict(pred)
pred['mf^2'][pred['mf^2'] < 0] = np.nan # remove invalid results
pred['mf3'] = np.sqrt(pred['mf^2'])

## Create plots

In [None]:
# ===============================================================
# Set up axes
# ===============================================================

from matplotlib.colors import Normalize
from matplotlib.colorbar import ColorbarBase

# colors
indices = np.linspace(0, cmap.N, len(df))
my_colors = [cmap(int(i)) for i in indices]

# subplots
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=[9,4], gridspec_kw={'width_ratios': [9,9,9,1]})
ax1.get_shared_y_axes().join(ax1, ax2, ax3)
ax1.set_ylim([0, 1000])
ax1.set_title('$mf \sim whp$')
ax1.set_ylabel('Mass flow')
ax1.set_xlabel("Well head pressure")
ax1.set_xlim(5, 16)
ax2.set_title('$mf \sim whp + date$')
ax3.set_title('$mf^2 \sim whp^2 + date$')
for ax in [ax2, ax3]:
    ax.set_yticklabels([])
    ax.set_xlim(5, 16)

# create date colorbar
indices = np.linspace(0, cmap.N, len(date_pred))
my_colors = [cmap(int(i)) for i in indices]
norm = Normalize(np.min(df['date']).year, np.max(df['date']).year)
cb = ColorbarBase(ax4, cmap=cmap, norm=norm, orientation='vertical')
cb.set_label('Year')

# ===============================================================
# Plot data
# ===============================================================

# plot data points
for ax in [ax1, ax2, ax3]:
    ax.scatter(df['whp'], df['mf'], cmap=cmap, c=df['date'])
    
# model 1
ax1.plot(pred[pred['date_numeric']==np.min(pred['date_numeric'])]['whp'],
         pred[pred['date_numeric']==np.min(pred['date_numeric'])]['mf1'])

# model 2 & 3
for i, date in enumerate(date_pred):
    ax2.plot(pred['whp'][pred['date_numeric']==date], pred['mf2'][pred['date_numeric']==date], c=my_colors[i])
    ax3.plot(pred['whp'][pred['date_numeric']==date], pred['mf3'][pred['date_numeric']==date], c=my_colors[i])

# show model selection criteria
for ax, result in zip([ax1, ax2, ax3], [results1, results2, results3]):
    ax.legend(['Adj $R^2$: %.2f' % result.rsquared_adj,
               'AIC: %.2f' % result.aic], 
              handlelength=0, handletextpad=0, loc=1).legendHandles[0].set_visible(False)

In [None]:
df.plot('date', 'h', style='o', alpha=0.5)
plt.show()