In [None]:
"""
Introduction To Pandas


Originally Presented at Israel Tech Challenge (http://www.israeltechallenge.com)




Main objects: Series & DataFrame

Description: like relational database tables in python

Details: 
    1) built on top of Numpy
    2) All the data does not need to be of the same type!
"""

In [None]:
"""
Outline




A) Series

B) DataFrame

C) Upload Data

D) View Data

E) Indexing

F) Iterating

G) Reshaping & Sorting

H) Join & Group
"""

In [None]:
import pandas as pd
import numpy as np

In [None]:
"""

A) Series

"""

In [None]:
# pass list, tuple, or array & pandas makes the indeces automatically
future_array1 = [1,2,3,4,5,6] # 1 dimensional array
array1 = np.array(future_array1)
s = pd.Series(array1)
s

In [None]:
"""

B) DataFrame

"""

In [None]:
# DataFrame: object that can contain more than 1 Series
dates = pd.date_range("20160101", periods=6)
data = np.random.random((6,3))
df = pd.DataFrame(data, index=dates, columns=['Column1', 'Column2', 'Column3'])
df

In [None]:
# DataFrames, unlike Arrays, can store elements of different data types!
df['Column4'] = pd.Series([True, False, False, False, True, True], index=dates)

In [None]:
"""

C) Upload Data

"""

In [None]:
# There's a lot of ways to upload, but let's just focus on csv files for now, it's easy!
uploaded_data = pd.read_csv("filename.csv", index_col=0)

In [None]:
"""

D) View Data

"""

In [None]:
# View the top of the data set
df.head(3) # <- can leave it (), defaults to 5

In [None]:
# View the bottom of the data set
df.tail(3) # <- can leave it (), defaults to 5

In [None]:
# View each piece of the DataFrame
print df.index
print ""
print df.columns
print ""
print df.values

In [None]:
# View description statistics
print df.describe()

In [None]:
"""

E) Indexing

"""

In [None]:
# Column names
df['Column2']

In [None]:
# Row Indeces
df[0:2] # or df['20160101':'20160102']

In [None]:
# Multi-axis with label
df.loc['20160101':'20160102',['Column1','Column3']]

In [None]:
# Multi-axis with indexing
df.iloc[3:5, 0:2]

In [None]:
# Boolean
df[df.Column1 > .5]

In [None]:
"""

F) Iterating

"""

In [None]:
for index, row in df.iterrows():
    print index, row[0]

In [None]:
"""

G) Reshaping & Sorting

"""

In [None]:
# Transpose
df.T

In [None]:
# Sort by Index
df.sort_index(axis=0, ascending=False)

In [None]:
# Sort by Values
df.sort_values(by='Column2')

In [None]:
"""

H) Join & Group

"""

In [None]:
# Join
data1 = np.random.random((6,2))
data2 = np.random.random((6,2))
df1 = pd.DataFrame(data1, index=dates, columns=['ColumnA', 'ColumnB'])
df2 = pd.DataFrame(data2, index=dates, columns=['ColumnC', 'ColumnD'])
df1.join(df2) # joins on the index, you can join on other columns with: pd.merge(df1, df2, on="column_name", how='left')

In [None]:
# Group
df.groupby('Column4').corr()

In [None]:
"""

More Resources

http://pandas.pydata.org/pandas-docs/stable/10min.html#min

http://pandas.pydata.org/pandas-docs/stable/tutorials.html

"""