In [None]:
import os
import glob
import subprocess
import numpy as np
import pandas as pd
from tqdm import tqdm

In [None]:
#load simdata

#sh (header)
head_lines = subprocess.check_output('head -n 1 simdata/*/*/*.csv', shell=True)
head_lines = head_lines.decode('utf-8')
head_lines = head_lines.split('\n')
# print(head_lines)

#extract header
header = head_lines[1].split(',')
for l in range(0, len(head_lines), 3):
    assert(header == head_lines[l+1].split(','))  # assert all simdata has the same header

#sh (values)
last_lines = subprocess.check_output('tail -n 1 simdata/*/*/*.csv', shell=True)
last_lines = last_lines.decode('utf-8')
last_lines = last_lines.split('\n')
# print(last_lines)

#dvi
simdata = []
for l in range(0, len(last_lines), 3):
    filename = last_lines[l]
    values = last_lines[l+1]
    assert('==>' in filename)
    
    filename2 = filename.split(' ')[1]
    _, pref, year, meshcode = os.path.splitext(filename2)[0].split('/')
    values = values.split(',')
    # print(pref, year, meshcode)
    simdatum = dict(zip(header, values))
    simdatum['pref'] = pref
    simdatum['meshcode'] = meshcode
    simdatum['year'] = int(year)
    simdata.append(simdatum)

#to dataframe
simdata = pd.DataFrame(simdata)
for dt in ('DVI', 'GY', 'LAI', 'TMX', 'TAV', 'RAD'):
    simdata[dt] = pd.to_numeric(simdata[dt])
simdata.head()

In [None]:
simdata.DL

# DVI variation


In [None]:
simdata.DVI.hist().set(xlabel='DVI', ylabel='Frequency')

In [None]:
simdata.groupby(['pref']).DVI.describe()

In [None]:
simdata.groupby(['year']).DVI.describe()

In [None]:
simdata.groupby(['pref', 'year']).DVI.describe()

# GY Heatmap

In [None]:
import AMD_Tools3 as amd

param = 'GY'

def meshcode2lat(meshcode):
    latlon = amd.mesh2lalo(meshcode)
    return latlon[0]

def meshcode2lon(meshcode):
    latlon = amd.mesh2lalo(meshcode)
    return latlon[1]

#groupby
simdata_grouped = simdata.groupby(by=['meshcode', 'year'])[param].max()
# print('len(DVI<1.9):', (simdata_grouped < 1.9).sum())
# print('len(simdata):', len(simdata_grouped))
simdata2 = simdata_grouped.reset_index()
simdata2['lat'] = simdata2.meshcode.astype(str).apply(meshcode2lat)
simdata2['lon'] = simdata2.meshcode.astype(str).apply(meshcode2lon)
simdata2.head()

In [None]:
#scatter plot
fig, axes = plt.subplots(9, 4, figsize=(12, 15))
for year, ax in zip(simdata2.year.unique(), axes.flatten()):
    df = simdata2[simdata2.year == year].copy()
    df.plot.scatter(x='lon', y='lat', c=param, ax=ax)
    ax.set_title(f'year={year}', fontsize=9)
fig.tight_layout()

In [None]:
#on google map
import folium
for year, ax in zip(simdata2.year.unique(), axes.flatten()):
    
    #scatter map plot
    map = folium.Map(location=[35.1708333, 138.18125], zoom_start=6)
    for i, row in df.iterrows():
        folium.CircleMarker([row['lat'], row['lon']], radius=1,
                    popup=str(row[param]), fill_color=row[param], fill_opacity=0.5).add_to(map)
    # map.save(f'output/heatmap_{year}.html')
    break
map

# Input data

In [None]:
#load all simdata (take time...)
simdata_all = [pd.read_csv(csvpath) for csvpath in glob.glob('simdata/*/*/*.csv')]
simdata_all = pd.concat(simdata_all, axis=0)

print('simdata_all.shape:', simdata_all.shape)
simdata_all.head()

In [None]:
#Histogram of inputs

dt = ['DVI', 'DVR', 'TAV', 'TMX', 'RAD', 'DL', 'PPM']
fig, axes = plt.subplots(2, 4, figsize=(12, 5))
cnt = 0

for _dt in tqdm(dt):
    ax = axes.flatten()[cnt]
    df = simdata_all[_dt]
    df.plot.hist(ax=ax)
    ax.set_xlabel(_dt)
    ax.set_ylabel('Frequency')
    cnt += 1
    
fig.tight_layout()

In [None]:
#Scatter matrix of inputs

import seaborn as sns
sns.set(style="ticks")

dt = ['DVI', 'DVR', 'TAV', 'TMX', 'RAD', 'DL', 'PPM']
df = simdata_all[dt]
df = df.sample(frac=.005, axis=0)  # random sampling to make it fast
print('df.shape:', df.shape)
sns.pairplot(df, size=1.)