# 02 Using Pandas

### The **Data Frame** 

### Overview
<table>
<tr><td>Creating a DataFrame<td><td>|<td><td>Data retrieval<td><tr>
<tr><td>Conditions<td><td>|<td><td>Date range as an index<td><tr>
<tr><td>Nested dictionary to DataFrame<td><td>|<td><td> <td><tr>
<tr><td> <td><td> <td><td> <td><tr>
<table>

## Setup

In [1]:
import pandas as pd
import numpy as np
import datetime
from IPython.display import display, HTML

# turn on data table rendering
pd.set_option('display.notebook_repr_html', True)

## Creating a DataFrame

In [2]:
# Collection of data columns
s1 = np.random.randn(5)
s2 = [True, True, False, True, False]
s3 = ['Apple', 'Banana', 'Tomato', 'Bean', 'Rice']

# Dict with added column names
data = {'Randnum': s1, 'IsBool': s2, 'Name': s3}
df = pd.DataFrame(data)
df

Unnamed: 0,Randnum,IsBool,Name
0,-0.788344,True,Apple
1,-0.039208,True,Banana
2,-0.320315,False,Tomato
3,-0.655596,True,Bean
4,1.258711,False,Rice


In [3]:
print(df.index)
print(df.columns)

RangeIndex(start=0, stop=5, step=1)
Index(['Randnum', 'IsBool', 'Name'], dtype='object')


In [4]:
# Create a new column and assign it all 127
df['New'] = 127
df

Unnamed: 0,Randnum,IsBool,Name,New
0,-0.788344,True,Apple,127
1,-0.039208,True,Banana,127
2,-0.320315,False,Tomato,127
3,-0.655596,True,Bean,127
4,1.258711,False,Rice,127


## Data retrieval

In [5]:
# Select the Randnum column
df.Randnum

0   -0.788344
1   -0.039208
2   -0.320315
3   -0.655596
4    1.258711
Name: Randnum, dtype: float64

In [6]:
# Select the first three rows
df[:3]

Unnamed: 0,Randnum,IsBool,Name,New
0,-0.788344,True,Apple,127
1,-0.039208,True,Banana,127
2,-0.320315,False,Tomato,127


In [7]:
# Select the second row
df.iloc[1]

Randnum   -0.039208
IsBool         True
Name         Banana
New             127
Name: 1, dtype: object

In [8]:
# Select the Randnum value of the second row
df.iloc[1, 2] # or

'Banana'

In [9]:
# Select the Randnum value of the second row
df.loc[1, 'Randnum']

-0.039208441775940774

In [10]:
# Selecting specific rows and columns
df.loc[[0, 2], ['Name', 'Randnum']]


Unnamed: 0,Name,Randnum
0,Apple,-0.788344
2,Tomato,-0.320315


## Conditions

In [11]:
# Retrieve boolean Series, True if Randnum is smaller than zero
belowzero = df.Randnum < 0
belowzero

0     True
1     True
2     True
3     True
4    False
Name: Randnum, dtype: bool

In [12]:
# Selects all rows meeting the belowzero condition
df[belowzero]

Unnamed: 0,Randnum,IsBool,Name,New
0,-0.788344,True,Apple,127
1,-0.039208,True,Banana,127
2,-0.320315,False,Tomato,127
3,-0.655596,True,Bean,127


In [13]:
# Retrieve boolean Series, True if Randnum is smaller than zero
isapple = df['Name'] == 'Apple'
isapple

0     True
1    False
2    False
3    False
4    False
Name: Name, dtype: bool

In [14]:
# Select belowzero AND isapple conditions
df[belowzero & isapple]

Unnamed: 0,Randnum,IsBool,Name,New
0,-0.788344,True,Apple,127


In [15]:
# Select belowzero OR isapple conditions
df[belowzero | isapple]

Unnamed: 0,Randnum,IsBool,Name,New
0,-0.788344,True,Apple,127
1,-0.039208,True,Banana,127
2,-0.320315,False,Tomato,127
3,-0.655596,True,Bean,127


## Date range as an index

In [16]:
# Set the index to a date range
df.index = pd.date_range('1-1-2015', periods=5, freq='d')
df.index.name = 'Date'
df

Unnamed: 0_level_0,Randnum,IsBool,Name,New
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-01,-0.788344,True,Apple,127
2015-01-02,-0.039208,True,Banana,127
2015-01-03,-0.320315,False,Tomato,127
2015-01-04,-0.655596,True,Bean,127
2015-01-05,1.258711,False,Rice,127


## Nested dictionary to DataFrame

In [17]:
# Create a nested dictionary of equal inner value-count
data = {'Paris': {'N': 1.2, 'E': 4, 'S': 2.9, 'W': 0.8},
        'Amsterdam': {'N': 2.3, 'E': 1.7, 'S': 2.1, 'W': 7.2},
        'London': {'N': 9.7, 'E': 3.1, 'S': 7.2, 'W': 2}}

df2 = pd.DataFrame(data)
df2

Unnamed: 0,Paris,Amsterdam,London
N,1.2,2.3,9.7
E,4.0,1.7,3.1
S,2.9,2.1,7.2
W,0.8,7.2,2.0


### Done!

#### Next: _Data Alignment_