## Week 1 - Matplotlib tutorial

Matplotlib is the standard python library used for creating plots.

## Basic plotting

#### Line plots

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

x = np.arange(0, 3 * np.pi, 0.1)
y = np.sin(x)

plt.plot(x, y)
plt.show()

In [None]:
x = np.arange(0, 3 * np.pi, 0.1)
y_sin = np.sin(x)
y_cos = np.cos(x)

plt.plot(x, y_sin)
plt.plot(x, y_cos)
plt.xlabel('x axis label')
plt.ylabel('y axis label')
plt.title('Sine and Cosine')
plt.legend(['Sine', 'Cosine'])
plt.show()

#### Subplots

In [None]:
x = np.arange(0, 3 * np.pi, 0.1)
y_sin = np.sin(x)
y_cos = np.cos(x)

plt.subplot(2, 1, 1)
plt.plot(x, y_sin)
plt.title('Sine')

plt.subplot(2, 1, 2)
plt.plot(x, y_cos)
plt.title('Cosine')
plt.show()

## Plotting options

In [None]:
plt.plot([1, 2, 3, 4], [10, 20, 25, 30], color='lightblue', linewidth=3)
plt.scatter([0.3, 3.8, 1.2, 2.5], [11, 25, 9, 26], color='darkgreen', marker='^')
plt.xlim(0.5, 4.5)
plt.show()

When we have multiple axes in the same plot, we can access these independently as follows:

In [None]:
fig = plt.figure(figsize=(20,10))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

# or replace the three lines of code above by the following line: 
#fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20,10))

# Plot the data
ax1.bar([1,2,3],[3,4,5])
ax2.barh([0.5,1,2.5],[0,1,2])

# Show the plot
plt.show()

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot(131)
ax2 = fig.add_subplot(132)
ax3 = fig.add_subplot(133)

x = np.arange(0, 3 * np.pi, 0.1)
y = np.sin(x)

# Plot the data
ax1.bar([1,2,3],[3,4,5])
ax2.barh([0.5,1,2.5],[0,1,2])
ax2.axhline(0.45)
ax1.axvline(0.65)
ax3.scatter(x,y)

# Show the plot
plt.show()

## Matplotlib with pandas

In [None]:
import pandas as pd

In [None]:
# Take a look at the csv - we want to ignore the first 4 rows
data = pd.read_csv('data/olympic.csv', skiprows=4)

In [None]:
data.head()

In [None]:
filteredData = data[data.Edition == 2008]
filteredData.Sport.value_counts().plot()

By default, the `.plot()` method gives us a line plot. For this plot we would prefer a bar chart.

In [None]:
filteredData = data[data.Edition == 2008]
filteredData.Sport.value_counts().plot(kind='bar')

We can also make a horizontal bar chart:

In [None]:
filteredData.Sport.value_counts().plot(kind='barh')

Or display the data as a pie chart:

In [None]:
filteredData = data[data.Edition == 2008]
filteredData.Sport.value_counts().plot(kind='pie')