# `matplotlib` Multiple Lines, One Plot 

Also covered is some ways to handle missing data

## Set Up Backend 

In [None]:
%matplotlib notebook

In [None]:
# The rest of the imports

from pandas import DataFrame
from pandas import Series
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib as mpl

## Get Some Data

In [None]:
stock = 'GOOGLE'
df = pd.read_csv(stock + '_stock.csv')

In [None]:
df.head()

### Convert Index to Timestamp and Drop `date` Column

In [None]:
df_date_index = df.rename(index = pd.to_datetime(df['date'], 
                          format = '%Y-%m-%d')).drop('date', axis = 1)

In [None]:
print(df_date_index.head())

In [None]:
print(type(df_date_index.index))

### Resample to Day Period

In [None]:
df_day_index = df_date_index.resample('D', kind = 'period').mean()

print(df_day_index.head())
print(type(df_day_index.index))

### What to do with `NaN`

#### `ffill` (forward fill) and `bfill` (backward fill)
* `ffill` uses last value seen
* `bfill` uses next value seen

In [None]:
df_day_all_days = df_date_index.resample('D', kind = 'period').mean().fillna(method = 'ffill')

In [None]:
print(df_day_all_days.head())

NOTICE: `2014-03-08` and `2014-03-09` are filled with the values from `2014-03-07`<br>
forward fill

* By changing the `ffill` to `bfill` change to backfill

#### `.interpolate`

In [None]:
df_day_all_days = df_date_index.resample('D', kind = 'period').mean().interpolate(method = 'linear')

In [None]:
print(df_day_all_days.head())

NOTICE: the value for `2014-03-08` and `2014-03-09` are on a linear line<br>
between the data on `2014-03-07` and `2014-03-10`

* There are lot of other techniques for the method of interpolation. <br>
  See the Pandas documentation for `.interpolate`
* The rest of the notebook uses the values calculated from `.interpolate`
* The next cell converts the index into timestamp values that can be plotted

In [None]:
X_all_days = df_day_all_days.index.to_timestamp().values

In [None]:
print(X_all_days[0:10])

In [None]:
print(type(X_all_days[0]))

## Setting up the Figure

In [None]:
# Set up the figure 
# figsize=(width, height)
figure = plt.figure("Open Close Graph", figsize = (8, 3), dpi = 100)

In [None]:
ax = plt.subplot2grid((1, 1), (0, 0), fig = figure)  

In [None]:
ax.grid(True)

In [None]:
ax.set_title("High Low Graph", fontsize = 12)


## Plotting the Data

In [None]:
# added to stop warning
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [None]:
ax.plot(X_all_days, df_day_all_days['high'], 
        linestyle = '-', 
        linewidth = 0.5, 
        color = 'blue',
        label = 'High')

ax.plot(X_all_days, df_day_all_days['low'], 
        linestyle = '-', 
        linewidth = 0.5, 
        color = 'red',
        label = 'Low')

ax.legend(loc = 'best')

## Setting up the Axis

### Setting Up the `xaxis` with Dates

In [None]:
years = mpl.dates.YearLocator()
yearsFmt = mpl.dates.DateFormatter('%Y')

ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)

ax.tick_params(axis = 'x', 
               which = 'major', 
               labelsize = 5,
               rotation = 45)

# If you do not add these lines you do not get the ticks for the months
months = mpl.dates.MonthLocator() 
ax.xaxis.set_minor_locator(months)

### Setting Up the `yaxis`

In [None]:
max_high = df_day_all_days['high'].max()
min_low = df_day_all_days['low'].min()
top = (divmod(max_high + 100, 100)[0] * 100) + 50
bottom = (divmod((min_low), 100)[0] * 100) + 50
print(top)
print(bottom)

In [None]:
ax.set_ylim(bottom, top)
ax.set_yticks(np.arange(bottom, top + 50, 50))
ax.tick_params(axis = 'y', labelsize = 6)
ax.set_ylabel('Dollars', fontsize = 8, labelpad = 0)

## Saving the Graphic

In [None]:
!rm high_low_1_graph.png

In [None]:
figure.savefig("high_low_1_graph.png")

# End of Notebook