#Numpy, Matplotlib, and Pandas... Oh My!

#Matplotlib (Plotting)

In [None]:
!ls

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
x = np.linspace(0,10,100)
x

In [None]:
plt.plot(x,2*x**3)

#Pandas (Advanced 'Excel' of data science)

##Series


Think of a Pandas Series as a labeled one-dimensional vector. In fact, it need not be a numeric vector, it can contain arbitrary python objects.

In [None]:
from numpy.random import randn
import numpy as np

In [None]:
np.array(range(10))

In [None]:
int_series = pd.Series(range(10))
int_series.head()

In [None]:
num_series = pd.Series(randn(10))
num_series.head()

##Indexes
Notice how each series has an index (in this case a relatively meaningless default index).
Pandas can make great use of informative indexes. Indexes work similarly to a dictionary key, allowing fast lookups of the data associated with the index.
Indexes can also be exploited for fast group-bys, merges, time-series operations and lots more.
When you're really in the zone with pandas, you'll be thinking a lot about indexes.

In [None]:
indexed_series = pd.Series(randn(5), 
                           index = ['California', 'Alabama', 
                                    'Indiana', 'Montana', 
                                    'Kentucky'])
alt_indexed_series = pd.Series(randn(5),
                               index = ['Washington', 'Alabama', 
                                        'Montana', 'Indiana', 
                                        'New York'])
print indexed_series
print '\n'
print alt_indexed_series

In [None]:
indexed_series.mean()

In [None]:
#Pandas uses the index by default to align series for arithmetic!
result = indexed_series + alt_indexed_series
print result
print result.fillna(np.mean(result))

In [None]:
#Datetime index
dt_index = pd.date_range('2015-1-1', 
                        '2015-11-1', 
                        freq='1 m')

In [None]:
dt_index

##DataFrames
Data frames extend the concept of Series to table-like data.

In [None]:
df = pd.DataFrame(randn(10, 5), index=dt_index, columns=[x for x in 'abcde'])
df.tail()

In [None]:
#A dataframes columns are series:
col = df.a
type(col)

In [None]:
df['a']

In [None]:
#New column
df['new'] = df['a'] + df['b']
df

In [None]:
#Delete a column
df.drop('new', axis=1)

In [None]:
df[df.index == '2015-01-31']

In [None]:
df = pd.read_csv('data/playgolf.csv', delimiter='|' )
print df.head()

In [None]:
df = pd.read_csv('data/playgolf.csv', delimiter='|' )
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.hist(['Temperature','Humidity'],bins=5)

In [None]:
df.plot('Temperature', 'Humidity', kind='scatter')

In [None]:
#for color

groups=df.groupby('Outlook')

fig, ax = plt.subplots()

ax.margins(0.05)
for name, group in groups:
    ax.plot(group.Temperature, group.Humidity, marker='o', linestyle='', ms=12, label=name)
ax.legend(numpoints=1, loc='lower right')

plt.show()