# Plots for Land Cover Type data

This notebook uses data which are produced by `make_lct_timeseries.ipynb`.

In [None]:
from pathlib import Path
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection

In [None]:
pwd = os.getcwd().split('/')[-1]
in_pollen_abundance = pwd == 'pollen-abundance'
TMP_DIR = Path('../tmp') if in_pollen_abundance else Path('tmp')
OUTPUT_DIR = Path('../outputs') if in_pollen_abundance else Path('outputs')

### Plot pollen data

In [None]:
site_meta = (
    pd.read_csv(OUTPUT_DIR / 'site_metadata.csv').set_index('sitecode')
)

In [None]:
site_meta

In [None]:
pol_df = (
    pd.concat([
        pd.read_csv(OUTPUT_DIR / site / 'lct_pct_ts.csv')
        .assign(sitecode=site) 
        for site in site_meta.index
    ])
    .set_index(['sitecode', 'agebp'])
    .sort_index()
)

In [None]:
def get_base_ts(df: pd.DataFrame):
    """Nicely formatted DF containing undifferentiated time series"""
    df = df.copy().filter(regex=r'^pct_*')
    df.columns = [' '.join(x.split('_')[1:]).title() 
                 for x in df.filter(regex=r'^pct_*').columns]
    return df

#### For print

In [None]:
def plot_print_chronology(sitename, earliest, latest, figlabel=None, save=False):
    df = get_base_ts(pol_df).loc[sitename, :]  # extract pollen percents for specified site
    df = df[(df.index <= earliest) & (df.index >= latest)] # exclude samples from earlier that specified years before present
    
    def tweak_pct_ticks(axis, pct_vals):
        max_pct = int(round(pct_vals.max()*1.1))
        
        def get_increments(maximum):
            while maximum % 4 != 0:
                maximum += 1
            return [maximum/4 * i for i in range(5)]
        
        increments = get_increments(max_pct)
        axis.set_xlim(0, increments.pop())
        axis.xaxis.set_ticks(increments)
        
    def make_under_line_polygon(xx, yy, e, l):
        line_vertices = np.column_stack((xx, yy))
        leftmost_corners = np.array([[0, e], [0,l]])
        vertices = np.concatenate((line_vertices, leftmost_corners))
        return Polygon(vertices, True)       
    
    pollen_line_colour = '#145D85'
    
    f, axes = plt.subplots(1, len(df.columns), sharey=True)
    for i, group in enumerate(df.columns):
        xx = df[group].values
        yy = df.index.values
        axes[i].plot(xx,yy, color=pollen_line_colour)
        axes[i].set_title(group.title())
        axes[i].set_ylim([latest, earliest])
        tweak_pct_ticks(axes[i], xx)
        
        poly = make_under_line_polygon(xx, yy, earliest, latest)
        p = PatchCollection([poly], alpha=0.4)
        p.set_color(pollen_line_colour)
        axes[i].add_collection(p)
        
        if i == 0:
            axes[i].set_ylabel('yrs BP', fontsize=13)
            if figlabel:
                xticks = axes[i].get_xticks()
                yticks = axes[i].get_yticks()
                xtick_scale = xticks[1]-xticks[0]
                ytick_scale = yticks[1]-yticks[0]

                axes[i].text(-1.15*xtick_scale, latest-0.5*ytick_scale, 
                             figlabel,
                             fontdict = {'weight': 'bold',
                                         'size': 16}
                            )
    
    plt.gca().invert_yaxis()
    plt.subplots_adjust(hspace=0, wspace=0)
    f.text(0.51, 0.02, '% contribution to total pollen sample', ha='center', fontsize=13)
    #plt.suptitle(sitename, y=1.05, fontsize=12)
    
    if save:
        d = os.path.join('plots')
        if not os.path.exists(d):
            os.makedirs(d)

        plt.savefig(os.path.join('plots',
                                 (sitename.replace(' ', '_')+'_'
                                 +str(earliest)+'-'+str(latest)+'.pdf')))

In [None]:
for s in pol_df.index.get_level_values(0).unique():
    print(s)
    plot_print_chronology(s, 15000, 0)

Of these, to my eye, San Rafael looks the most interesting (like there's a lot going on). 

On the other hand, what's going on in Navarres at 6000 years ago with sprouters?

#### Interactive

In [None]:
from bokeh.io import push_notebook, show, output_notebook
from bokeh.layouts import gridplot, widgetbox, column  # container for bokeh figure objects
from bokeh.models.widgets import Dropdown
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
output_notebook()

In [None]:
def plot_interactive_chronology(sitename):
    df = get_base_ts(pol_df).loc[sitename, :] #  extract pollen percents for specified site
    
    # create a column data source for the plots to share
    source = ColumnDataSource(data=df.reset_index().to_dict('list'))
    
    # container for bokeh figure objects
    plots = [] 
    time_range=None
    
    TOOLS = "ypan,ywheel_zoom"
    
    def get_width(base, factor, plot_num):
        # function to increase width of first plot, since this ends up narrowed
        # due to being the only one with yaxis labels.
        if plot_num > 0:
            return base
        else:
            return int(round(base*(1+factor)))
    
    for i, group in enumerate(df.columns):
        p = figure(tools=TOOLS, plot_width=get_width(150, .25, i), 
                   plot_height=500, y_range=time_range,
                   title=group.title())
        p.line(group, 'agebp', source=source)
        if i == 0:
            p.y_range.flipped = True
            time_range = p.y_range
        else:
            p.yaxis.major_label_text_font_size = '0pt'
                    
        plots.append(p)
   
    p = gridplot([plots])
    t = show(p, notebook_handle=True)
                
    return t

In [None]:
def print_daterange(sitename):
    df = get_base_ts(pol_df).loc[sitename, :]
    latest = df.index.min()
    earliest = df.index.max()
    print('earliest date: {0} yr BP'.format(earliest))
    print('latest date: {0} yr BP'.format(latest))

In [None]:
for s in pol_df.index.get_level_values(0).unique():
    print(s)
    print_daterange(s)
    print('\n')

In [None]:
plot_interactive_chronology('algendar')

In [None]:
pol_df.head()

#### Points of particular interest in time series (discussed in upgrade report)

##### San Rafael 4000 - 8000 yrs BP
Big variation in grasses shrubs and sprouters around the time it is thought agriculture reached Iberia (6500 yrs BP).

In [None]:
plot_print_chronology('san_rafael', 8500, 1000, figlabel='A', save=True)

##### Navarres 6000 - 7000 yrs BP
~ 200 year oscillation in percentages of grass and seeders 6400 - 6800 yrs BP, followed by sudden and sustained increase in sprouters after 6400 yrs BP

In [None]:
plot_print_chronology('navarres', 10500, 3000, figlabel='B', save=True)