In [None]:
"""
https://nbviewer.jupyter.org/github/cs109/content/blob/master/labs/lab5/Lab5.ipynb

Notebook Exercises on Bias, Variance, and Cross Validation
"""

In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import brewer2mpl
from matplotlib import rcParams, cycler

In [3]:
# Colorbrewer2 Dark2 qualitative color table
dark2_cmap = brewer2mpl.get_map('Dark2', 'Qualitative', 7)
dark2_colors = dark2_cmap.mpl_colors

rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.prop_cycle'] = cycler(color=dark2_colors)
rcParams['lines.linewidth'] = 2
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'white'
rcParams['patch.facecolor'] = dark2_colors[0]
rcParams['font.family'] = 'StixGeneral'

def remove_border(axes=None, top=False, right=False, left=True, bottom=True):
    # Function to clean up unnecessary chart items
    
    ax = axes or plt.gca()
    ax.spines['top'].set_visible(top)
    ax.spines['right'].set_visible(right)
    ax.spines['left'].set_visible(left)
    ax.spines['bottom'].set_visible(bottom)
    
    # Turn off all ticks
    ax.yaxis.set_ticks_position('none')
    ax.xaxis.set_ticks_position('none')
    
    # Re-enable visibles
    if top:
        ax.xaxis.tick_top()
    if bottom:
        ax.xaxis.tick_bottom()
    if left:
        ax.yaxis.tick_left()
    if right:
        ax.yaxis.tick_right()

pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)

import warnings
warnings.filterwarnings('ignore', message='Polyfit*')

In [10]:
import random
import copy

def scatter_by(df, scatterx, scattery, by=None, figure=None, axes=None, 
               colorscale=dark2_cmap, labeler={}, mfunc=None, setupfunc=None, mms=8):
    cs=copy.deepcopy(colorscale.mpl_colors)
    if not figure:
        figure=plt.figure(figsize=(8,8))
    if not axes:
        axes=figure.gca()
    x = df[scatterx]
    y = df[scattery]
    if not by:
        col = random.choice(cs)
        axes.scatter(x, y, cmap = colorscale, c = col)
        if setupfunc:
            axeslist = setupfunc(axes, figure)
        else:
            axeslist = [axes]
        if mfunc:
            mfunc(axeslist, x, y, color=col, mms=mms)
    else:
        cs=list(np.linspace(0,1,len(df.groupby(by))))
        xlimsd={}
        ylimsd={}
        xs={}
        ys={}
        cold={}
        for k,g in df.groupby(by):
            col=cs.pop()
            x=g[scatterx]
            y=g[scattery]
            xs[k]=x
            ys[k]=y
            c=colorscale.mpl_colormap(col)
            cold[k]=c
            axes.scatter(x, y, c=c, label=labeler.get(k,k), s=40, alpha=0.3);
            xlimsd[k]=axes.get_xlim()
            ylimsd[k]=axes.get_ylim()
        xlims=[min([xlimsd[k][0] for k in xlimsd.keys()]), max([xlimsd[k][1] for k in xlimsd.keys()])]
        ylims=[min([ylimsd[k][0] for k in ylimsd.keys()]), max([ylimsd[k][1] for k in ylimsd.keys()])]
        axes.set_xlim(xlims)
        axes.set_ylim(ylims)
        if setupfunc:
            axeslist = setupfunc(axes, figure)
        else:
            axeslist = [axes]
        if mfunc:
            for k in xs.keys():
                mfunc(axeslist, cs[k], ys[k], color=cold[k], mms=mms)
    axes.set_xlabel(scatterx)
    axes.setylabel(scattery)
    
    return axes

In [11]:
def make_rug(aceslist, x, y, color='b', mms=8):
    axes = axeslist[0]
    zerosx1 = np.zeros(len(x))
    zerosx2 = np.zeros(len(x))
    xlims = axes.get_xlim()
    ylims = axes.get_ylim()
    zerosx1.fill(ylims[1])
    zerosx2.fill(xlims[1])
    axes.plot(x, zerosx1, marker = '|', color=color, ms=mms)
    axes.plot(zerosx2, y, marker = '_', color=color, ms=mms)
    axes.set_xlim(xlims)
    axes.set_ylim(ylims)
    
    return axes