# 6. Metadata and Assessing Potential Bias
Compiled by [Morgan Williams](mailto:morgan.williams@csiro.au) for C3DIS 2018 

In [1]:
import numpy as np
import pandas as pd
from mpl_toolkits.basemap import Basemap
import matplotlib.patches as patches
import matplotlib.colors as colors
from matplotlib.colors import LogNorm

from plotting_tools import *

### Data Coverage

In [None]:
ax = df.count()[::-1].plot.barh(figsize=(12, 20), logx=False, color='seagreen');
ax.set_ylabel('Column', fontsize=16)
ax.set_xlabel('Count', fontsize=16)

### Spatial Distribution

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 6))
ax=[ax]
ax[0].set_xlabel('Longitude', labelpad=20)
ax[0].set_ylabel('Latitude', labelpad=20)
bmp = Basemap(ax=ax[0], projection='mill', lon_0=0, resolution='l')
bmp.drawcoastlines(linewidth=0.1)
bmp.drawparallels(np.arange(-90,90,30), labels=[1,0,0,0], linewidth=0.1)
bmp.drawmeridians(np.arange(bmp.lonmin, bmp.lonmax + 30,60), labels=[0,0,0,1], linewidth=0.1)
bmp.drawmapboundary(fill_color='lightblue')
bmp.fillcontinents(color='0.5', lake_color='lightblue', alpha=0.5)

filt = (df.Material == 'IGNEOUS')
plot_latlong(df.loc[filt, :], bmp, ax=ax[0], alpha=0.1, linewidth=0, color='red', marker='D', s=0.1, zorder=3)
heatmap = False
if heatmap:
    nx, ny = 45, 45
    lon_bins = np.linspace(-180, 180, nx+1)
    lat_bins = np.linspace(-90, 90, ny+1)

    heatmap, _, _ = np.histogram2d(df.loc[filt, 'Longitude'], df.loc[filt, 'Latitude'], bins=[lon_bins, lat_bins])
    lon_bins_2d, lat_bins_2d = np.meshgrid(lon_bins, lat_bins)
    xs, ys = bmp(lon_bins_2d, lat_bins_2d)
    im  = ax[0].pcolormesh(xs, ys, heatmap.T, 
                           alpha=0.5,
                           norm=LogNorm(),
                           cmap='viridis',
                           zorder=2)
    cb = colorbar(im, shrink=0.8,)
    cb.set_label('Histogram Frequency')


### Age Distribution

In [None]:
fig, ax = plt.subplots(2, 1,figsize=(10,4), sharex=True)
bins=100
f, a, r = age_distribution(df, yvar=None, agevar='Age', ax=ax[0], bins=bins, color='0.5', zorder=3, histtype='step', label='Full')
ax[0].set_yscale('log')
ax[0].set_ylabel('freq')
ax[0].set_xlim((0, 4500))
ax[0].set_ylim((1, 100000))
for rock_type, style in [('Basalt', dict(c='g', alpha=0.1, s=20, marker='D', zorder=3)),
                         ('Rhyolite', dict(c='r', alpha=0.1, s=20, marker='D', zorder=2)),
                        ]:
    
    rockdf = df.loc[df.TASRock == rock_type, :]
    f, a, r = age_distribution(rockdf, yvar='MgO', agevar='Age', ax=ax[1], label=rock_type, **style)
    age_distribution(rockdf, yvar=None, ax=ax[0], bins=bins,
                     color=style['c'], zorder=3, histtype='step', label=rock_type)
    a.set_yscale('log')
    a.set_ylim((0.01, 100))

ax[0].legend(frameon=False)
ax[1].legend(frameon=False)