# Correlation Visualizer
**by Timothy R. Mayes, Ph.D** (<mayest2@comcast.net>)

*Version 1.0, 1 September 2018*

This Jupyter notebook is intended to demonstrate how correlated variables appear in both scatter and line charts. It makes use of Jupyter Widgets to create a slider control that allows the user to vary the correlation coefficient and immediately see an updated chart. The chart data is randomly generated using the Numpy library. Because the data is random, the actual correlation will be close to, but not exactly equal to, the requested correlation.

## Import Needed Libraries

In [1]:
import numpy as np
from scipy import stats, signal
import matplotlib.pyplot as plt
%matplotlib inline

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

## Functions for Generating the Data and the Charts

In [18]:
def correlated_normal(corr, mean1, stdev1, mean2, stdev2, numpts = 500):
    '''Generate two data series that have a correlation specified by corr.
    This is just a wrapper around np.random.multivariate_normal. I am using 
    this wrapper so that I can specify the correlation instead of the covariance.'''
    cov12 = corr*stdev1*stdev2
    mean = (mean1, mean2)
    cov = [[stdev1**2, cov12], [cov12, stdev2**2]]
    x = np.random.multivariate_normal(mean, cov, numpts).T
    return x

def draw_chart(corr, mean1=0.10, stdev1=0.15, mean2=0.10, stdev2=0.15, numpts = 500):
    '''Draw a scatter chart of two series and fit a regression line'''
    x = correlated_normal(corr,mean1,stdev1,mean2,stdev2,numpts) #Generate the random data with specified correlation

    #Regression Line (regr[0] is slope, regr[1] is intercept, and regr[2] is correlation)
    regr = stats.linregress(x[0], x[1]) 
    regr_line = regr[1] + regr[0]*x[0] #Generate regression line data
    corr12 = regr[2] #correlation from regression output

    #Set up Figure with two subplots
    fig, (line,scatter) = plt.subplots(1,2,figsize=(20, 10))
    fig.suptitle(r'Correlation $\approx$ ' + '{:0.3f}'.format(corr),y=0.98,fontsize=24)
    
    #Line Chart using Savgol filter as an alternative to smoothing with a moving average
    #The smoothing is just to make it easier to see the pattern of correlation between the two series
    #See http://scipy.github.io/devdocs/generated/scipy.signal.savgol_filter.html#scipy.signal.savgol_filter
    smooth1 = signal.savgol_filter(x[0], 51, 3)
    smooth2 = signal.savgol_filter(x[1], 51, 3)
    x_vals = range(1, len(smooth1)+1)
    line.plot(x_vals, smooth1, 'b-',label="Variable 1")
    line.plot(x_vals, smooth2, 'r-',label="Variable 2")
    line.set_title('Line Chart', fontsize=20, pad=15)
    line.legend(fontsize=14)
    line.tick_params(axis='both', labelsize=14, pad=15)
    line.set_xlabel('Period',fontsize=16)
    line.set_ylabel('Value',fontsize=16)
    
    #Scatter Chart
    scatter.plot(x[0], x[1], 'o', alpha = 0.2, label='Raw Data') #Plot the generated data
    scatter.plot(x[0],regr_line,'r-', label = 'Regression Line') #Plot the regression line
    scatter.plot([],[], alpha = 0, label = 
             'Actual Correlation = '+'{:0.3f}'.format(corr12)) #Empty plot just to add actual corr to legend
    scatter.set_title('XY Scatter Chart', fontsize=20, pad=15)
    limits = np.min(x[0]),np.max(x[0]),np.min(x[1]),np.max(x[1])
    scatter.set_xlim(min(limits)*1.2,max(limits)*1.2)
    scatter.set_ylim(min(limits)*1.2,max(limits)*1.2)
    scatter.set_aspect('equal')
    scatter.tick_params(axis='both', labelsize=14, pad=15)
    scatter.legend(fontsize=14)
    scatter.set_xlabel('Var 1',fontsize=16)
    scatter.set_ylabel('Var 2',fontsize=16)
    return

## Show the Slider and the Interactive Chart
Click on the slider to change the correlation coefficient and update the charts.

In [19]:
def do_chart(corr):
    '''This function calls draw_chart. It is needed to avoid generating one slider for each argument
    of the draw_chart function. For my purposes here, I just use the default values for all arguments other 
    than corr.'''
    draw_chart(corr)
    return

interact(do_chart, corr=widgets.FloatSlider(value=0.0,min=-1.0,max=1.0,step=0.1,description='Correlation:',
                                                   disabled=False,continuous_update=False,orientation='horizontal',
                                                   readout=True,readout_format='.2f',));

interactive(children=(FloatSlider(value=0.0, continuous_update=False, description='Correlation:', max=1.0, min…