# Practice Assignment: Understanding Distributions Through Sampling

** *This assignment is optional, and I encourage you to share your solutions with me and your peers in the discussion forums!* **


To complete this assignment, create a code cell that:
* Creates a number of subplots using the `pyplot subplots` or `matplotlib gridspec` functionality.
* Creates an animation, pulling between 100 and 1000 samples from each of the random variables (`x1`, `x2`, `x3`, `x4`) for each plot and plotting this as we did in the lecture on animation.
* **Bonus:** Go above and beyond and "wow" your classmates (and me!) by looking into matplotlib widgets and adding a widget which allows for parameterization of the distributions behind the sampling animations.


Tips:
* Before you start, think about the different ways you can create this visualization to be as interesting and effective as possible.
* Take a look at the histograms below to get an idea of what the random variables look like, as well as their positioning with respect to one another. This is just a guide, so be creative in how you lay things out!
* Try to keep the length of your animation reasonable (roughly between 10 and 30 seconds).

In [2]:
import matplotlib.pyplot as plt
import numpy as np

%matplotlib notebook

# generate 4 random variables from the random, gamma, exponential, and uniform distributions
x1 = np.random.normal(-2.5, 1, 10000)
x2 = np.random.gamma(2, 1.5, 10000)
x3 = np.random.exponential(2, 10000)+7
x4 = np.random.uniform(14,20, 10000)

# plot the histograms
plt.figure(figsize=(9,3))
plt.hist(x1, normed=True, bins=20, alpha=0.5)
plt.hist(x2, normed=True, bins=20, alpha=0.5)
plt.hist(x3, normed=True, bins=20, alpha=0.5)
plt.hist(x4, normed=True, bins=20, alpha=0.5);
plt.axis([-7,21,0,0.6])

plt.text(x1.mean()-1.5, 0.5, 'x1\nNormal')
plt.text(x2.mean()-1.5, 0.5, 'x2\nGamma')
plt.text(x3.mean()-1.5, 0.5, 'x3\nExponential')
plt.text(x4.mean()-1.5, 0.5, 'x4\nUniform')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x7f8f077601d0>

In [3]:
plt.subplot?

In [4]:
plt.figure()
plt.subplot(1,2,1)
plt.plot(x1,x2,'-o')
plt.subplot(122)
plt.plot(x3,x4,'-o')


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f8f0770b1d0>]

In [5]:
plt.figure()
ax1 = plt.subplot(121)
plt.plot(x1,x2,'-o')

ax2 = plt.subplot(122,sharey = ax1)
plt.plot(x3,x4,'-x')



<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f8f0767dba8>]

In [6]:
fig,((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2,sharex =True,sharey=True)
ax1.plot(x1,'-s',c= 'y')
ax2.plot(x2,'-o',c ='b')
ax3.plot(x3,'-x',c='r')
ax4.plot(x4,'-r')
ax1.set_title('Normal Distribution')
ax3.set_title('Exponential Distribution')
ax2.set_title('Gamma Distribution')
ax4.set_title('Uniform Distribution')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x7f8f073c0fd0>

In [7]:
for ax in plt.gcf().get_axes():
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_visible(True)

In [8]:
fig,((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2,sharex =True)
ax1.hist(x1,normed = True,bins = 10,alpha = 0.5,color = 'r')
ax2.hist(x2,normed = True,bins = 50,alpha = 0.5)
ax3.hist(x3,normed = True,bins = 50,alpha = 0.5)
ax4.hist(x4,normed = True,bins = 50,alpha = 0.5)



<IPython.core.display.Javascript object>

(array([ 0.16587235,  0.17337412,  0.16753941,  0.1417    ,  0.17754176,
         0.18837764,  0.18587706,  0.16670588,  0.16670588,  0.16587235,
         0.16503882,  0.19754647,  0.1417    ,  0.17087353,  0.15086882,
         0.15753706,  0.15420294,  0.18087588,  0.17254059,  0.18421   ,
         0.16087117,  0.15753706,  0.17337412,  0.16837294,  0.15336941,
         0.16420529,  0.14253353,  0.17004   ,  0.15837059,  0.16087117,
         0.1617047 ,  0.15920412,  0.18170941,  0.18170941,  0.16920647,
         0.18087588,  0.17504117,  0.1475347 ,  0.17087353,  0.17837529,
         0.17337412,  0.17170706,  0.1617047 ,  0.15003529,  0.16503882,
         0.15420294,  0.17004   ,  0.16837294,  0.17087353,  0.15920412]),
 array([ 14.00098531,  14.12095708,  14.24092885,  14.36090062,
         14.4808724 ,  14.60084417,  14.72081594,  14.84078772,
         14.96075949,  15.08073126,  15.20070303,  15.32067481,
         15.44064658,  15.56061835,  15.68059013,  15.8005619 ,
         15.

In [9]:
plt.figure()
plt.scatter(x1,x2)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7f8f06e9e8d0>

In [10]:
plt.figure()
plt.scatter(x3,x4)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7f8f06e0b160>

In [11]:
import matplotlib.gridspec as gridspec

In [12]:
plt.figure()
gspec = gridspec.GridSpec(3,3)
top_histogram = plt.subplot(gspec[0,1:])
side_histogram = plt.subplot(gspec[1:,0])
lower_right = plt.subplot(gspec[1:,1:])

<IPython.core.display.Javascript object>

In [13]:
lower_right.scatter(x1,x2)

<matplotlib.collections.PathCollection at 0x7f8f06e0bd68>

In [14]:
top_histogram.hist(x2,bins =10)

(array([  3.20700000e+03,   3.43400000e+03,   1.94700000e+03,
          8.35000000e+02,   3.35000000e+02,   1.65000000e+02,
          4.20000000e+01,   2.50000000e+01,   9.00000000e+00,
          1.00000000e+00]),
 array([  0.03709873,   1.73548284,   3.43386695,   5.13225106,
          6.83063516,   8.52901927,  10.22740338,  11.92578749,
         13.6241716 ,  15.32255571,  17.02093981]),
 <a list of 10 Patch objects>)

In [15]:
s = side_histogram.scatter(x3,x4)

In [16]:
top_histogram.invert_xaxis()

In [17]:
for ax in [top_histogram,lower_right]:
    ax.set_xlim(0,1)

In [18]:
for ax in [side_histogram,lower_right]:
    ax.set_ylim(-5,5)
    

In [19]:
import pandas as pd
import numpy as np

In [20]:
df = pd.DataFrame({'normal':x1,
                  'gamma':x2,
                  'exponential':x3,
                  'uniform':x4})

In [21]:
df.describe()

Unnamed: 0,exponential,gamma,normal,uniform
count,10000.0,10000.0,10000.0,10000.0
mean,9.024267,2.997123,-2.480646,16.990025
std,2.015774,2.108479,1.0081,1.73423
min,7.00076,0.037099,-6.053919,14.000985
25%,7.604381,1.443955,-3.1741,15.458482
50%,8.390191,2.52656,-2.477082,16.981036
75%,9.801244,4.060325,-1.817361,18.494025
max,29.74906,17.02094,1.406294,19.999574


In [22]:
plt.figure()
_ = plt.boxplot(df['normal'],whis = 'range')

<IPython.core.display.Javascript object>

In [23]:
plt.clf()
_ = plt.boxplot([df['normal'],df['gamma'],df['exponential'],df['uniform']],whis = 'range')

In [24]:
plt.figure()
_ = plt.hist(df['gamma'],bins =100)

<IPython.core.display.Javascript object>

In [25]:
import mpl_toolkits.axes_grid1.inset_locator as mpl_il

In [26]:
plt.figure()
plt.boxplot([df['normal'],df['gamma'],df['exponential'],df['uniform']],whis = 'range')
ax2 = mpl_il.inset_axes(plt.gca(),width  ='60%',height = '40%' , loc = 2)
ax2.hist(df['gamma'],bins = 100)
ax2.margins(x = 0.5)

<IPython.core.display.Javascript object>

In [27]:
ax2.yaxis.tick_right()

In [28]:
plt.figure()
_ = plt.boxplot([df['normal'],df['gamma'],df['exponential'],df['uniform']])

<IPython.core.display.Javascript object>

In [29]:
plt.figure()
_ = plt.hist2d(x1,x2,bins =25)
plt.colorbar()

<IPython.core.display.Javascript object>

<matplotlib.colorbar.Colorbar at 0x7f8f026b6f28>

In [30]:
plt.figure()
_ = plt.hist2d(x3,x4,bins =2)
plt.colorbar()

<IPython.core.display.Javascript object>

<matplotlib.colorbar.Colorbar at 0x7f8f0262dc18>

In [31]:
import matplotlib.animation as animation
n =10000
def update(curr):
    if curr == n:
        a.event_source.stop()
    plt.cla()
    bins = np.arange(-4,4,0.5)
    plt.hist(x1[:curr],bins = bins)
    plt.axis([-4,4,0,30])
    plt.gca().set_title('Sampling')
    plt.gca().set_ylabel('frequency')
    plt.gca().set_xlabel('Value')
    plt.annotate('n={}'.format(curr),[3,27])
        

In [34]:
fig =plt.figure()
#a  = animation.FuncAnimation(fig,update,interval = 100)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f8f025d1940>]

In [35]:
plt.figure()
data = x1
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f8efd6a18d0>]

In [36]:
def onclick(event):
    plt.cla()
    plt.plot(data)
    plt.gca().set_title('Event at pixels{},{} {} and data {}, {}'.format(event,x,event.y,'\n',event_xdata,event_ydata))
plt.gcf().canvas.mpl_connect('button = press_event',onclick)

7