# 🏋 ex6 Python Pandas

Uses `matplotlib` to create basic plots for `Pandas dataframe`. 

Unless you want to customize there is no need to load `matplotlib`.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

Let's use iris:

In [None]:
df = pd.read_csv("data/iris.csv")
df.head()

# Scatterplot

See `?pd.DataFrame.plot.scatter`

In [None]:
df.plot.scatter(x='sepal_length', y='sepal_width')

# Bar chart

See `?pd.DataFrame.plot.bar`

Let's create a minimal bar chart of sample type counts

In [None]:
count_species = df.groupby(['species']).size().reset_index(name='counts')  #add column of counts

count_species.plot.bar(x = 'species', y = 'counts', rot = 1)  #rot = 1 sets labels horizontal

# Boxplot

See `?pd.DataFrame.boxplot`

In [None]:
boxplot = df.boxplot()

# Histogram

See `?pd.DataFrame.hist`

In [None]:
df.hist(column='sepal_length')

In [None]:
df.hist()

# Line chart

See `?pd.DataFrame.line`

In [None]:
import numpy as np  #import numpy

x = np.linspace(0, 100, 5)
y = x * x

df = pd.DataFrame({
    'x' : x,
    'y' : y
    })

df.plot.line(x = 'x', y = 'y')

# Customizations
- Axis labels
- Title
- Graph size
- Transparency
- color
- color map

In [None]:
import matplotlib.cm as cm
import seaborn as sns

df = pd.read_csv("data/iris.csv")

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10,5)  # Graph size

# Manually creating a color map
color_map = {'setosa': '#1b9e77', 'versicolor': '#d95f02', 'virginica': '#7570b3'} # color map
colors = df.species.map(color_map)

# Generating color map using Matplotlib
import numpy as np
colors = cm.RdYlGn(np.linspace(0, 1, len(df)))

#df.species.unique(): 'setosa', 'versicolor', 'virginica' unique species
df.plot.scatter(x='sepal_length', y='sepal_width', c=colors, alpha=0.5) # color and transparency
plt.xlabel('sepal lenght (cm)') # x label
plt.ylabel('sepal width (cm)') # y label
plt.title('Iris dataset spieces') # Title
plt.show()


---

# Exercises

## 😜 Exercise 1

Create a scatterplot of the `SMO-VOR-2015.csv` dataset:

 - load `SMO-VOR-2015.csv` in a dataframe
 - map `x` to `ptime`
 - map `y` to `alt`
 - set point `alpha` to `0.1`
 - set x axis label to 'Penetration time'
 - set y axis label to 'Altitude (feets)'
 - set title label to 'SMO VOR 2015 dataset'
 - resize the figure to `(10, 5)`
 

In [None]:
df = pd.read_csv('data/SMO-VOR-2015.csv')

df.ptime = pd.to_datetime(df.ptime).values.astype(float)


## 😜 Exercise 2

Create a bar chart of the `SMO-VOR-2015.csv` dataset:

 - load `SMO-VOR-2015.csv` in a dataframe
 - map `x` to `month`
 - map `y` to `counts` 
 - set y axis label to 'Count'
 - set title label to 'SMO VOR 2015 dataset'
 - set rot to 1

In [None]:
df = pd.read_csv('data/SMO-VOR-2015.csv')

months = ['Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
df.month = pd.Categorical(df.month, categories=months, ordered=True)
df = df.groupby(['month']).size().reset_index(name='counts')


## 😜 Exercise 3

Create a boxplot of the `SMO-VOR-2015.csv` by months:

 - load `SMO-VOR-2015.csv` in a dataframe
 - set y axis label to 'Altitude (feets)'
 - set title label to 'SMO VOR 2015 dataset'

In [None]:
df = pd.read_csv('data/SMO-VOR-2015.csv')

months = ['Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
df.month = pd.Categorical(df.month, categories=months, ordered=True)

df = pd.DataFrame({
    'Mar' : pd.Series(df.alt[df.month == 'Mar']),
    'Apr' : pd.Series(df.alt[df.month == 'Apr']),
    'May' : pd.Series(df.alt[df.month == 'May']),
    'Jun' : pd.Series(df.alt[df.month == 'Jun']),
    'Jul' : pd.Series(df.alt[df.month == 'Jul']),
    'Aug' : pd.Series(df.alt[df.month == 'Aug']),
    'Sep' : pd.Series(df.alt[df.month == 'Sep']),
    'Oct' : pd.Series(df.alt[df.month == 'Oct']),
    'Nov' : pd.Series(df.alt[df.month == 'Nov']),
    'Dec' : pd.Series(df.alt[df.month == 'Dec'])
    })


## 😜 Exercise 4

Create an histogram chart of `SMO-VOR-2015.csv` by month:

 - load `SMO-VOR-2015.csv` in a dataframe
 - set y axis label to 'Altitude (feets)'
 - set title label to 'SMO VOR 2015 dataset'

In [None]:
df = pd.read_csv('data/SMO-VOR-2015.csv')

months = ['Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
df.month = pd.Categorical(df.month, categories=months, ordered=True)

df = pd.DataFrame({
    'Mar' : pd.Series(df.alt[df.month == 'Mar']),
    'Apr' : pd.Series(df.alt[df.month == 'Apr']),
    'May' : pd.Series(df.alt[df.month == 'May']),
    'Jun' : pd.Series(df.alt[df.month == 'Jun']),
    'Jul' : pd.Series(df.alt[df.month == 'Jul']),
    'Aug' : pd.Series(df.alt[df.month == 'Aug']),
    'Sep' : pd.Series(df.alt[df.month == 'Sep']),
    'Oct' : pd.Series(df.alt[df.month == 'Oct']),
    'Nov' : pd.Series(df.alt[df.month == 'Nov']),
    'Dec' : pd.Series(df.alt[df.month == 'Dec'])
    })


## 😜 Exercise 5

Create a line chart of the `TSLA.csv` dataset:

 - load `TSLA.csv` in a dataframe
 - map `x` to `Date`
 - map `y` to `Close`
 - set `marker` to '.'
 - set `color` to 'r'
 - set `linestyle` ':' 
 - set x axis label to ''
 - set y axis label to 'Stock closure value ($)'
 - set title label to 'Tesla (TSLA) Jan-Jul 2019'

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('data/TSLA.csv')
df.Date = pd.to_datetime(df.Date)
