In [None]:
from __future__ import print_function
import os
import numpy as np
from tqdm import tqdm, tqdm_notebook
# import pandas as pd
import matplotlib.pyplot as plt

# Sumarize results

In [None]:
import re
import glob

results = {}

for dir in glob.glob('output/*'):

    # skipped folders
    if os.path.splitext(dir)[-1] == '.py':
        continue
    elif os.path.basename(dir) == 'old':
        continue

    # log exists?
    logpath = os.path.join(dir, 'log')
    if not os.path.exists(logpath):
        continue
        
    #init result
    key = os.path.basename(dir)
    results[key] = {}

    # load log
    with open(logpath, 'r') as f:
        log = f.readlines()
    for line in log:
        if 'Namespace' in line:
            # print(line)
            results[key]['batchsize'] = int(re.search('batchsize=([0-9]{,3}),', line).group(1))
            results[key]['debug'] = str(re.search('debug=(.{,5}),', line).group(1))
            results[key]['epochs'] = int(re.search('epochs=([0-9]{,4}),', line).group(1))
            results[key]['DVIth'] = float(re.search('DVIth=([0-2]\.[0-9]{,3})', line).group(1))
            results[key]['GYth'] = int(re.search('GYth=([0-9]{,3})', line).group(1))
            try:
                results[key]['optimizer'] = str(re.search("optimizer='(.{,10})',", line).group(1))
                results[key]['input'] = str(re.search("input=\[(.+)\],", line).group(1))
                results[key]['model'] = str(re.search("model='(.{,3})',", line).group(1))
            except:
                pass
        elif 'acc' in line:
            match = re.search('\>\s.+\:\s(.*)', line)
            mae = float(match.group(1))
            results[key]['MAE'] = round(mae, 4)
            
results = pd.DataFrame(results).T
results.index = pd.to_datetime(results.index, format='%m%d-%H%M%S')
results

# Load a result

In [None]:
#easydict
import easydict
args = easydict.EasyDict({
            'path': './output/0415-200054//',
})

#init
outdir = args.path

#accuracies
with open(os.path.join(args.path, 'log')) as f:
    lines = f.readlines()
    for line in lines:
        if line.find('Test') >= 0:
            print(line[:-1])

#ml.png
from PIL import Image
image = Image.open(os.path.join(args.path, 'ml.png'))
image.resize((500, 500))

In [None]:
#sort by MAE and MSE
from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)
    
history = pd.read_csv(os.path.join(outdir, 'history.csv'), index_col=0)
history.index.name = 'epoch'

display_side_by_side(
    history.sort_values('val_loss').head(),
    history.sort_values('val_mean_absolute_error').head(),
)

In [None]:
#load model
# from vis.utils import utils
from keras import activations
from keras.models import load_model
# from vis.visualization import visualize_activation
# from vis.input_modifiers import Jitter

# best_epoch = history.sort_values('val_loss').index[0]
model_path = f'{args.path}/best.h5'
print('Loading', model_path)
model = load_model(model_path)
model.summary()

In [None]:
import sys
sys.path.append('convnet-drawer')
from keras_util import convert_drawer_model
from matplotlib_util import save_model_to_file

cmodel = convert_drawer_model(model)
cmodel.save_fig(os.path.join(args.path, 'model.svg'))
# save_model_to_file(cmodel, os.path.join(args.path, 'model.eps'))
print('saved')

In [None]:
#plot prediction results
from utils import xyline
from sklearn.metrics import mean_squared_error

fig, axes = plt.subplots(1, 2, figsize=(10, 4.5))

for i, mode in enumerate(('train', 'valid')):
    result = pd.read_csv(
        os.path.join(args.path, f'result_{mode}.csv'),
        index_col=0)
    
    ax = axes[i]
    result.plot.scatter(x='actual', y='predict', ax=ax, alpha=.05, s=1)
    
    xyline(ax)
    ax.set_xlim(300, 1200)
    ax.set_ylim(300, 1200)
    if mode == 'train':
        ax.set_title('training')
    else:
        ax.set_title('validation')
        
    mse = mean_squared_error(result.actual, result.predict)
    print(f'({mode}) MSE: {mse:.1f}')
    
fig.tight_layout()
fig.savefig(os.path.join(args.path, 'result.eps'), dpi=300)

# Comparison of calculation time

In [None]:
n_sample = 10

In [None]:
#time using CNN

from keras.models import load_model

path = 'output/0211-174552/'
model = load_model(os.path.join(path, 'best.h5'))
x_train = np.load(os.path.join(path, 'x_train.npy'))
y_train = np.load(os.path.join(path, 'y_train.npy'))
idxs = np.random.randint(0, len(x_train), n_sample)
%timeit pred = model.predict(x_train[idxs])

In [None]:
#time using SIMRIW
import os
import glob
from pysimriw import simriw

def simulate(csvpath, start_date):
    """ Copy of script in simulate.py """
    simulated = simriw.main('Koshihikari', csvpath, True, start_date, 350, './cultivars.hjson', silent=True)
    return None

csvpaths = glob.glob('meshdata/*/*/*.csv')
idxs = np.random.randint(0, len(csvpaths), n_sample)
%timeit result = [simulate(csvpaths[idx], '2016-01-01') for idx in idxs]

# Are testing data within a range of training data?

In [None]:
import AMD_Tools3 as amd

def meshcode2lat(meshcode):
    latlon = amd.mesh2lalo(meshcode)
    return latlon[0]

def meshcode2lon(meshcode):
    latlon = amd.mesh2lalo(meshcode)
    return latlon[1]

In [None]:
path = 'output/0415-200054/'
r_train = np.load(os.path.join(path, 'r_train.npy'))
r_valid = np.load(os.path.join(path, 'r_valid.npy'))
years =  np.unique(np.append(r_train[:, 0, 4, 0], r_valid[:, 0, 4, 0]))
years.sort()

In [None]:
from matplotlib import cm
import seaborn as sns

fig, axes = plt.subplots(5, 8, figsize=(12, 7))
for year, ax in tqdm_notebook(zip(years, axes.flatten())):

    piv = pd.DataFrame([{'kind': 4, 'lat': lat, 'lon': lon}
                       for lat in np.arange(31.1, 45.4, .1)
                       for lon in np.arange(128.6, 144.9, .1)])
    
    for r, v in ([r_train, 0], [r_valid, 2]):
        df = r[r[:, 0, 4, 0] == year]
        print('df.shape[0]:', df.shape[0])
        for row in df:
            year = row[0, 4, 0]
            meshcode = row[0, 5, 0]
            lat = round(meshcode2lat(str(int(meshcode))), 1)
            lon = round(meshcode2lon(str(int(meshcode))), 1)
            piv.loc[(lat-.01<piv.lat) & (piv.lat<lat+.01) & (lon-.01<piv.lon) & (piv.lon<lon+.01), 'kind'] = v
        
    piv2 = pd.pivot_table(piv, values='kind', index='lat', columns='lon')
    sns.heatmap(piv2, ax=ax, cmap=cm.hot,
                vmin=0, vmax=4,
                xticklabels=[], yticklabels=[],
                cbar=False,
               )
    ax.set_title(f'year={int(year)}', fontsize=9)
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.invert_yaxis()
    # break

#hide
for i in range(len(years), 40):
    ax = axes.flatten()[i]
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

fig.tight_layout()
fig.savefig(os.path.join(path, 'data_overwrap.tiff'), dpi=300)