In [1]:
import pandas as pd
import numpy as np

In [2]:
# create a series using a numpy random number generator

s = pd.Series(np.random.randn(7), index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])  
s

a   -2.232009
b    1.090969
c    1.770224
d   -0.523239
e    0.503814
f    0.600562
g    0.781101
dtype: float64

In [3]:
# head of series

pd.Series.head(s)

a   -2.232009
b    1.090969
c    1.770224
d   -0.523239
e    0.503814
dtype: float64

In [4]:
# tail of series

pd.Series.tail(s)

c    1.770224
d   -0.523239
e    0.503814
f    0.600562
g    0.781101
dtype: float64

In [5]:
# summary stats

pd.Series.describe(s)

count    7.000000
mean     0.284489
std      1.306244
min     -2.232009
25%     -0.009713
50%      0.600562
75%      0.936035
max      1.770224
dtype: float64

In [6]:
# select by location c to g

s.loc['c':]

c    1.770224
d   -0.523239
e    0.503814
f    0.600562
g    0.781101
dtype: float64

In [7]:
# select just b

s.loc['b']

1.0909692909527744

In [8]:
# slice for rows 1-3

s[:3]

a   -2.232009
b    1.090969
c    1.770224
dtype: float64

### Boolean indexing

In [9]:
# create another series ranging from -3 to 3

s = pd.Series(range(-3, 4))
s

0   -3
1   -2
2   -1
3    0
4    1
5    2
6    3
dtype: int64

In [10]:
# find the values that are > 0. 

s[s > 0]

4    1
5    2
6    3
dtype: int64

In [11]:
# find the values that are < -1 or > 0.5

s[(s < -1) | (s > 0.5)]

0   -3
1   -2
4    1
5    2
6    3
dtype: int64

In [12]:
# find the values that are not < 0.

s[~(s < 0)]

3    0
4    1
5    2
6    3
dtype: int64

In [13]:
# find the values that are < 2

s[s < 2]

0   -3
1   -2
2   -1
3    0
4    1
dtype: int64

In [16]:
column_labels = ['a', 'b', 'c', 'd', 'e']
row_index = ['mon', 'tues', 'wed', 'thurs', 'fri', 'sat', 'sun']
df = pd.DataFrame(np.random.rand(7,5), index=row_index, columns=column_labels)
df.head(4)

Unnamed: 0,a,b,c,d,e
mon,0.716133,0.231526,0.580542,0.420118,0.321061
tues,0.383588,0.090954,0.575974,0.943677,0.150704
wed,0.675004,0.221506,0.589565,0.979347,0.361099
thurs,0.073459,0.35918,0.274972,0.291896,0.241687


In [25]:
# df.shape
# mask = [False, True, True, True, True, False, True]
mask = (df['e'] < 0.5) & (df['c'] > 5)
mask
# len(mask)
# df['a'][mask]

mon      False
tues     False
wed      False
thurs    False
fri      False
sat      False
sun      False
dtype: bool

In [18]:
df['a'].iloc[[0,2,5]]

mon    0.716133
wed    0.675004
sat    0.544220
Name: a, dtype: float64