In [4]:
import pandas as pd
import numpy as np

In [5]:
# create a series using a numpy random number generator

s = pd.Series(np.random.randn(7), index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])  
s

a   -1.203171
b    0.525522
c    0.829843
d    1.391773
e   -0.022034
f    1.182895
g    0.772742
dtype: float64

In [6]:
# head of series

pd.Series.head(s)

a   -1.203171
b    0.525522
c    0.829843
d    1.391773
e   -0.022034
dtype: float64

In [12]:
s['e']

-0.022034338805802314

In [7]:
# tail of series

pd.Series.tail(s)

c    0.829843
d    1.391773
e   -0.022034
f    1.182895
g    0.772742
dtype: float64

In [21]:
# summary stats

des = pd.DataFrame(pd.Series.describe(s))
des.T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
0,7.0,0.496796,0.877371,-1.203171,0.251744,0.772742,1.006369,1.391773


In [14]:
# select by location c to g

s.loc['c':'g']

c    0.829843
d    1.391773
e   -0.022034
f    1.182895
g    0.772742
dtype: float64

In [10]:
# select just b

s.loc['b']

0.52552153809793889

In [15]:
# slice for rows 1-3

s[0:3]

a   -1.203171
b    0.525522
c    0.829843
dtype: float64

### Boolean indexing

In [22]:
# create another series ranging from -3 to 3

s = pd.Series(range(-3, 4))
s

0   -3
1   -2
2   -1
3    0
4    1
5    2
6    3
dtype: int64

In [26]:
# find the values that are > 0. 

s[s < -5]


Series([], dtype: int64)

In [47]:
# find the values that are < -1 or > 0.5

s[(s < -1) | (s > 0.5)]

0   -3
1   -2
4    1
5    2
6    3
dtype: int64

In [27]:
# find the values that are not < 0.

s[~(s < 0)]

3    0
4    1
5    2
6    3
dtype: int64

In [28]:
# find the values that are < 2

s[s < 2]

0   -3
1   -2
2   -1
3    0
4    1
dtype: int64

In [34]:
s.loc['d':'f']

Series([], dtype: int64)

In [35]:
column_labels = ['a', 'b', 'c', 'd', 'e']
row_index = ['mon', 'tues', 'wed', 'thurs', 'fri', 'sat', 'sun']
df = pd.DataFrame(np.random.rand(7,5), index = row_index, columns = column_labels)
df.head()

Unnamed: 0,a,b,c,d,e
mon,0.797986,0.577128,0.323251,0.280467,0.139776
tues,0.208265,0.163258,0.307926,0.700827,0.698194
wed,0.14061,0.647707,0.688668,0.349333,0.535909
thurs,0.878903,0.003775,0.458,0.992845,0.610997
fri,0.200753,0.455462,0.909889,0.909185,0.101162


In [47]:
df.ix[:,['a','b','c']]

Unnamed: 0,a,b,c
mon,0.797986,0.577128,0.323251
tues,0.208265,0.163258,0.307926
wed,0.14061,0.647707,0.688668
thurs,0.878903,0.003775,0.458
fri,0.200753,0.455462,0.909889
sat,0.030766,0.139228,0.063727
sun,0.823007,0.567065,0.608972


In [48]:
df[['a','c','e']]

Unnamed: 0,a,c,e
mon,0.797986,0.323251,0.139776
tues,0.208265,0.307926,0.698194
wed,0.14061,0.688668,0.535909
thurs,0.878903,0.458,0.610997
fri,0.200753,0.909889,0.101162
sat,0.030766,0.063727,0.443938
sun,0.823007,0.608972,0.492871


In [53]:
df.loc[['mon','wed','fri']]

Unnamed: 0,a,b,c,d,e
mon,0.797986,0.577128,0.323251,0.280467,0.139776
wed,0.14061,0.647707,0.688668,0.349333,0.535909
fri,0.200753,0.455462,0.909889,0.909185,0.101162


In [55]:
df.loc[['mon','tues'],['b','c']]

Unnamed: 0,b,c
mon,0.577128,0.323251
tues,0.163258,0.307926


# Independent Practice: (25 minutes)
Create a series
Look at the head, tail, and summary stats
Select series values by index
Single value
Multiple value
Using Boolean indexing find values that are < than another value
Using Boolean indexing find values that are > than another value
Using Boolean indexing find values that are < than another value and > another value


Bonus: Create a DataFrame with at least 5 rows

Slice for certain rows
Single row by label and index
Multiple row range by label and index
Single row series + multi-value range
labeled index
int index

In [72]:



s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s.tail()
s[0]
s[0:3]

s[s > 0]
s[s < 5]

s[(s > 0) & (s < 3)]

a    0.107155
e    1.027259
dtype: float64

In [84]:
#bonus
d = {'a': [1,2,3,4] , 'b': [1,2,3,4] , 'c': [1,2,3,4] , 'd': [1,2,3,4]}
s = pd.DataFrame(d)
s

Unnamed: 0,a,b,c,d
0,1,1,1,1
1,2,2,2,2
2,3,3,3,3
3,4,4,4,4


In [92]:
#slice certain rows
print s[0:2]


s.iloc[0,:]

   a  b  c  d
0  1  1  1  1
1  2  2  2  2


a    1
b    1
c    1
d    1
Name: 0, dtype: int64