This workbook is for extracting WK255 data and recreating the plots.

File structure: 
```
(root)
├── src
│    └── Python Test.ipynb
└── wairakei_data
     └── Liquid wells (version 1).xlsx
```

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

try:
    xl
except:
    xl = pd.ExcelFile('../wairakei_data/Liquid wells (version 1).xlsx')
print(xl.sheet_names[:5], '...')

In [None]:
# remove rows at end
df = xl.parse('wk255')
df = df.loc[:, ~df.columns.str.contains('^Unnamed|SUMMARY')]
df['mf'] = pd.to_numeric(df['mf'], errors='coerce')
df = df.dropna(subset=['date', 'whp', 'mf'])
print(df.head())

In [None]:
# create exploratory plots

cmap = plt.get_cmap('viridis')
indices = np.linspace(0, cmap.N, len(df))
my_colors = [cmap(int(i)) for i in indices]

fig, (ax1, ax2) = plt.subplots(1,2)

df.plot('date', 'whp', style='x', ax=ax1)
ax1.set_xlabel('date')
ax1.set_ylabel('whp')

for i, idx in enumerate(df.index):
    ax2.plot(df.loc[idx, 'whp'], df.loc[idx, 'mf'], 'o', color=my_colors[i])
ax2.set_xlabel('whp')
ax2.set_ylabel('mf')
plt.show()

In [None]:
# fit regression model

from sklearn import linear_model
from statsmodels.formula.api import ols
from sklearn.metrics import mean_squared_error, r2_score
from itertools import product

# convert date to a numeric value (days since first)
df['date_numeric'] = (df['date'] - df['date'].min())  / np.timedelta64(1,'D')

# generate prediction data for plotting
date_pred = np.linspace(np.min(df['date_numeric']), np.max(df['date_numeric']), 6)
whp_pred = np.array([np.min(df['whp']), np.max(df['whp'])])
pred = pd.DataFrame(list(product(date_pred, whp_pred)), columns=['date_numeric', 'whp'])

print(pred)

# Not conditioned on date
model = ols("mf ~ whp", data=df)
results = model.fit()

pred['mf'] = results.predict(pred)

# plot
indices = np.linspace(0, cmap.N, len(df))
my_colors = [cmap(int(i)) for i in indices]
fig, (ax1, ax2) = plt.subplots(1,2, sharey=True)
for i, idx in enumerate(df.index):
    ax1.plot(df.loc[idx, 'whp'], df.loc[idx, 'mf'], 'o', color=my_colors[i])
    ax2.plot(df.loc[idx, 'whp'], df.loc[idx, 'mf'], 'o', color=my_colors[i])

ax1.plot(pred['whp'], pred['mf'])
ax1.set_title('mf ~ whp')

# Condition on date
model = ols("mf ~ whp * date_numeric", data=df)
results = model.fit()
# print(results.summary())

pred['mf'] = results.predict(pred)
ax2.set_title('mf ~ whp + date')

# plot
indices = np.linspace(0, cmap.N, len(pred))
my_colors = [cmap(int(i)) for i in indices]
for i, date in enumerate(date_pred):
    ax2.plot(pred['whp'][pred['date_numeric']==date], pred['mf'][pred['date_numeric']==date], color=my_colors[i])


plt.show()

In [None]:
df.plot('date', 'h', style='o', alpha=0.5)
plt.show()