# Demo: Series and DataFrames

In [20]:
import pandas as pd
import numpy as np

In [21]:
# create a series using a numpy random number generator

s = pd.Series(np.random.randn(7), index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])  
s

a    0.905836
b   -0.186652
c    0.138456
d   -2.754079
e    1.084549
f   -0.328924
g    0.959582
dtype: float64

In [22]:
# head of series

pd.Series.head(s)

a    0.905836
b   -0.186652
c    0.138456
d   -2.754079
e    1.084549
dtype: float64

In [23]:
# tail of series

pd.Series.tail(s)

c    0.138456
d   -2.754079
e    1.084549
f   -0.328924
g    0.959582
dtype: float64

In [24]:
# summary stats

pd.Series.describe(s)

count    7.000000
mean    -0.025890
std      1.332919
min     -2.754079
25%     -0.257788
50%      0.138456
75%      0.932709
max      1.084549
dtype: float64

# Selection & slicing

In [25]:
# select by location c to g

s.loc['c':]

c    0.138456
d   -2.754079
e    1.084549
f   -0.328924
g    0.959582
dtype: float64

In [26]:
# select just b

s.loc['b']

-0.18665202723514043

In [27]:
# slice for rows 1-3

s[:3]

a    0.905836
b   -0.186652
c    0.138456
dtype: float64

# Index Alignment
### Index Alignment in Series
Pandas will align indices in the process of performing operations. This is very convenient when working with incomplete data

In [28]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,'California': 423967}, name='area')
area

Alaska        1723337
California     423967
Texas          695662
Name: area, dtype: int64

In [29]:
population = pd.Series({'California': 38332521, 'Texas': 26448193,'New York': 19651127}, name='population')
population

California    38332521
New York      19651127
Texas         26448193
Name: population, dtype: int64

In [30]:
population / area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

Any item for which one or the other does not have an entry is marked by NaN, or “Not a Number”, which is how Pandas marks missing data

### Index Alignment in DataFrame
A similar type of alingment takes place for both columns and indices when performing operations on dataframes

In [31]:
rng = np.random.RandomState(42)
A = pd.DataFrame(rng.randint(0, 20, (2, 2)), columns=list('AB'))
A

Unnamed: 0,A,B
0,6,19
1,14,10


In [32]:
B = pd.DataFrame(rng.randint(0, 10, (3, 3)), columns=list('BAC'))
B

Unnamed: 0,B,A,C
0,7,4,6
1,9,2,6
2,7,4,3


In [33]:
A + B

Unnamed: 0,A,B,C
0,10.0,26.0,
1,16.0,19.0,
2,,,


Notice that indices are aligned correctly irrespective of their order in the two objects, and indices in the result are sorted. 

# Boolean indexing

In [34]:
# create another series ranging from -3 to 3

s = pd.Series(range(-3, 4))
s

0   -3
1   -2
2   -1
3    0
4    1
5    2
6    3
dtype: int64

In [35]:
# find the values that are > 0. 

s[s > 0]

4    1
5    2
6    3
dtype: int64

In [36]:
# find the values that are < -1 or > 0.5

s[(s < -1) | (s > 0.5)]

0   -3
1   -2
4    1
5    2
6    3
dtype: int64

In [37]:
# find the values that are not < 0.

s[~(s < 0)]

3    0
4    1
5    2
6    3
dtype: int64

In [38]:
# find the values that are < 2

s[s < 2]

0   -3
1   -2
2   -1
3    0
4    1
dtype: int64