# Practice 2:  An overview of plotting

In [None]:
%matplotlib inline
from pylab import *
from random import random

## Histograms
A histogram displays the probability distribution of a continuous variable
(Can be used for "how many people said x how many times")

In [None]:
# Histograms
x = randn(10000)
hist_data = hist(x, 10)


#Note:  This shows a relatively normal distribution

In [None]:
# plotting a line.
x = range(10)
print(x)
y = [3*random() + i + random() for i in x]
print(y)
plot(x,y)

##You can save the images you create in Python easily in Jupyter notebooks!
savefig("line.png")

In [None]:
# scatter plot
scatter(x,y)
## This is the same data as above

In [None]:
# plotting two lines
y2 = [3 + i + 2*random() for i in x]
plot(x, y, color='r') # hold=True gives you the same axes
plot(x, y2, color='b')

## Fill
Sometimes you might want to fill in an area of your graph - you can do this!

In [None]:
# Fill in an area
## Playing around with this shows you how it works!
fill([1, 3, 4], [3, 3, 2])

### Fill for standard deviation of sequences
Remember, standard deviation measures how spread out the data is relative to the mean 

In [None]:
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
mean = [4, 4, 3, 3, 2, 2, 1, 1, 2, 2]
stdev = [.5, .4, .3, .5, .6, .1, .2, .3, .1, .05]

x2 = list(x)
x2.reverse()

topline = [mean[i] + stdev[i] for i in range(len(x))]
bottomline = [mean[i] - stdev[i] for i in range(len(x))]
bottomline.reverse()
fill(x + x2, topline + bottomline)

In [None]:
fill(x + x2, topline + bottomline, color='red', alpha=0.20) ## Alpha is a transparency param
plot(x, mean,  color='k')

## Axes

In [None]:
# axis labeling
fill(x + x2, topline + bottomline, color='r', alpha=0.2)
plot(x, mean, color='r')
xlabel("age (days)")
ylabel("income ($1k)")

In [None]:
# set the range of the axis
fill(x + x2, topline + bottomline,  color='r', alpha=0.2)
plot(x, mean,  color='r')
xlabel("age")
ylabel("income (% of lifetime)")
# either one of these work - there are other ways to do this, too, but this is the most compact

##NOTE  Xlim and Ylim take tuple as an argument
xlim((0,15))
ylim(0,100)

In [None]:
# log scale axes
## It is sometimes important to measure things in logs
fill(x + x2, topline + bottomline, color='r', alpha=0.2)
plot(x, mean, color='r')
xlabel("age")
ylabel("income")
yscale('log')

In [None]:
# captioning
## Graphs are only as useful for you readers as their labels!
fill(x + x2, topline + bottomline, color='r', alpha=0.2)
plot(x, mean,  color='r')
xlabel("age")
ylabel("income")
title("Poor example of the relationship between Age and Income")

In [None]:
# legend
y2 = [3 + i + 2*random() for i in x]
plot(x, y,  color='r', label='male')
plot(x, y2, color='b', label='female')
legend()
xlabel("age")
ylabel("income")
title("Legend Example")

In [None]:
help(legend)

In [None]:
## Location example

y2 = [3 + i + 2*random() for i in x]
plot(x, y,  color='r', marker="o", label='male')
plot(x, y2,  color='b', label='female')
legend(loc=0)
xlabel("age")
ylabel("income")
title("Legend Example")

In [None]:
#But what if my chart will be printed in black and white?
#Or people are color blind?

y2 = [3 + i + 2*random() for i in x]
plot(x, y, 'r+-.',  label='male')
plot(x, y2,  color='b', marker='o', linestyle='dashed', label='female')
legend(loc=4)
xlabel("age")
ylabel("income")
title("Poor Examples of Income across age in days and gender")

In [None]:
help(plot)

# Column Plots

In [None]:
import numpy
means = [25, 35, 30, 35, 27]
stds = [2, 3, 4, 1, 2]

placement = numpy.arange(5)
#returns evenly spaced values within a specified range
width = 0.65

bar(placement, means, width, color='r', yerr=stds)
##Error bars are super helpful to know how to plot!
ylabel("scores")
title("Bar Chart")
xticks(placement + width/2.0, ('one', '2', '3', 'IV', 'five'))

In [None]:
means = [25, 35, 30, 35, 27]
means2 = list(means)
means2.reverse()
stds = [2, 3, 4, 1, 2]
stds2 = [3, 2, 1, 1, 5]

placement = numpy.arange(5)
width = 0.35

bar(placement, means, width, color='r', yerr=stds)
bar([p+width for p in placement], means2, width, color='y', yerr=stds2)
ylabel("scores")
title("Bar Chart")
xticks(placement + width, ('one', 'two', 'three', 'four', 'five'))

In [None]:
# box and whisker plots
# some fake data 
spread = rand(50) * 100
center = ones(25) * 50
flier_high = rand(10) * 100 + 100
flier_low = rand(10) * -100
data = concatenate((spread, center, flier_high, flier_low), 0)
bp_data = boxplot(data)

In [None]:
from nltk.book import *

In [None]:
text4.dispersion_plot(["citizens", "democracy", "freedom", "moon", "America"])

In [None]:
fdist = FreqDist(text1)
fdist.plot(50)