# Pandas Exercises
#### From Lighthouse labs compass activities

In [2]:
import numpy as np
import pandas as pd

## Series

### Dataframes from ndarray:

In [14]:
# create a series
# (s = pd.Series(data, index=index))
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a   -1.089297
b    1.596187
c    0.288779
d    0.616389
e    1.978342
dtype: float64

In [18]:
# we specified the index
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [17]:
# here we let pandas default the index
pd.Series(np.random.randn(5))

0    0.041133
1   -1.911882
2   -0.227989
3    0.875008
4    0.481130
dtype: float64

### From dictionary:

In [21]:
d = {'b': 1, 'a': 0, 'c': 2}
pd.Series(d)

b    1
a    0
c    2
dtype: int64

### From scalar: 

In [22]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

### ndarray-like functionality of series:

In [23]:
s[0]

-1.089297194513939

In [24]:
s[:3]

a   -1.089297
b    1.596187
c    0.288779
dtype: float64

In [25]:
s[s > s.median()]

b    1.596187
e    1.978342
dtype: float64

In [27]:
 s[[4, 3, 1]]

e    1.978342
d    0.616389
b    1.596187
dtype: float64

In [28]:
np.exp(s)

a    0.336453
b    4.934182
c    1.334797
d    1.852228
e    7.230748
dtype: float64

In [29]:
# convert series to ndarray
s.to_numpy()

array([-1.08929719,  1.59618681,  0.28877927,  0.61638921,  1.97834244])

### Dict-like functionality:

In [30]:
s['a']

-1.089297194513939

In [31]:
s['e'] = 12
s

a    -1.089297
b     1.596187
c     0.288779
d     0.616389
e    12.000000
dtype: float64

In [32]:
'e' in s

True

In [33]:
'f' in s

False

### Vectorized operations:

In [34]:
s + s

a    -2.178594
b     3.192374
c     0.577559
d     1.232778
e    24.000000
dtype: float64

In [35]:
s * 2

a    -2.178594
b     3.192374
c     0.577559
d     1.232778
e    24.000000
dtype: float64

In [36]:
np.exp(s)

a         0.336453
b         4.934182
c         1.334797
d         1.852228
e    162754.791419
dtype: float64

In [37]:
s

a    -1.089297
b     1.596187
c     0.288779
d     0.616389
e    12.000000
dtype: float64

In [38]:
12**12

8916100448256

Differing from ndarray, series automatically align based on label

In [41]:
s1 = s[1:]
s2 = s[:-1]
print(s1)
print(s2)
s1 + s2

b     1.596187
c     0.288779
d     0.616389
e    12.000000
dtype: float64
a   -1.089297
b    1.596187
c    0.288779
d    0.616389
dtype: float64


a         NaN
b    3.192374
c    0.577559
d    1.232778
e         NaN
dtype: float64

Series can also have a name attribute:

In [46]:
s = pd.Series(np.random.randn(5), name = 'something')
s

0   -0.155013
1    1.310576
2   -0.772082
3    1.406241
4   -0.896145
Name: something, dtype: float64

## DataFrame:

### From dictionary of series or dictionaries:

In [16]:
d = {'one': pd.Series([1., 2., 3.], index = ['a', 'b', 'c']),
     'two': pd.Series([1., 2., .3, 4.], index = ['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,0.3
d,,4.0


In [4]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [5]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


### From dict of ndarrays or list:

In [6]:
d = {'one': [1., 2., 3., 4.],
         'two': [4., 3., 2., 1.]}

pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [7]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


### From a series:

In [8]:
pd.DataFrame(pd.Series(np.random.randn(5), name='something'))

Unnamed: 0,something
0,-1.803256
1,0.222537
2,0.326799
3,-0.612957
4,-0.599011


### Selection, addition, deletion:

In [9]:
# selection
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [12]:
df['three'] = df['one'] * df['two']

df['flag'] = df['one'] > 2

df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,0.3,0.9,True
d,,4.0,,False


In [24]:
# deletion
d = {'one': pd.Series([1., 2., 3.], index = ['a', 'b', 'c']),
     'two': pd.Series([1., 2., .3, 4.], index = ['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
del df['two']
df

Unnamed: 0,one
a,1.0
b,2.0
c,3.0
d,


In [30]:
# insertion
d = {'one': pd.Series([1., 2., 3.], index = ['a', 'b', 'c']),
     'two': pd.Series([1., 2., .3, 4.], index = ['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)

df['foo'] = 'bar'
df

Unnamed: 0,one,two,foo
a,1.0,1.0,bar
b,2.0,2.0,bar
c,3.0,0.3,bar
d,,4.0,bar


In [32]:
df['one_trunc'] = df['one'][:2]
df

Unnamed: 0,one,two,foo,one_trunc
a,1.0,1.0,bar,1.0
b,2.0,2.0,bar,2.0
c,3.0,0.3,bar,
d,,4.0,bar,


## Dtypes