In [2]:
import numpy as np
import pandas as pd

# Numpy
## Useful Methods

In [3]:
# Three equally spaced points between 0 and 5
np.linspace(0, 5, 3)

# Identity Matrix
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

## Random-Stuff

In [4]:
# Uniformly distributed
np.random.rand(5)

array([0.04211662, 0.84851846, 0.30695824, 0.01712881, 0.259964  ])

In [5]:
# Standard Normal Distribution
np.random.randn(5)

array([ 0.62406873, -0.02677633, -0.40371017,  0.14956028, -1.16402091])

In [6]:
# Random Integers
np.random.randint(1,7, 10)

array([5, 1, 4, 3, 1, 4, 4, 1, 3, 1])

## Reshaping

In [7]:
a = np.random.randint(0,50,10)
a

array([10, 28, 24, 47, 36, 25, 15, 39, 28, 11])

In [8]:
ra = a.reshape(5,2)
ra

array([[10, 28],
       [24, 47],
       [36, 25],
       [15, 39],
       [28, 11]])

In [9]:
ra.argmin()

0

## Indexing and Selection  

- Broadcasting

**NOTE** Slices return views of array, not copies! For copying use `arr.copy()`

----

In [10]:
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [11]:
slice_of_arr = arr[:5]
print(slice_of_arr)
slice_of_arr[:] = 99 # BROADCASTING
print(slice_of_arr)
print(arr)

[0 1 2 3 4]
[99 99 99 99 99]
[99 99 99 99 99  5  6  7  8  9 10]


# Pandas
## Hierarchical Indexing

In [12]:
arr = np.random.randn(6,2)
print(arr)
df = pd.DataFrame(arr)
print(df)

[[ 0.57090006 -0.57807162]
 [ 0.91882303  1.75039399]
 [-0.79838903  0.10503099]
 [ 1.56321784  0.86403114]
 [ 2.21870613 -1.16236024]
 [ 0.00968016  1.51303992]]
          0         1
0  0.570900 -0.578072
1  0.918823  1.750394
2 -0.798389  0.105031
3  1.563218  0.864031
4  2.218706 -1.162360
5  0.009680  1.513040


In [13]:
groups = ["G1"]*3 + ["G2"]*3
nums = [1, 2, 3, 1, 2, 3]

df.index=pd.MultiIndex.from_tuples(zip(groups, nums))
df.index.names = ["groups", "nums"]

In [14]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
groups,nums,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.5709,-0.578072
G1,2,0.918823,1.750394
G1,3,-0.798389,0.105031
G2,1,1.563218,0.864031
G2,2,2.218706,-1.16236
G2,3,0.00968,1.51304


In [15]:
df.loc["G2"].loc[2]

0    2.218706
1   -1.162360
Name: 2, dtype: float64

In [16]:
df.xs(2, level=1)

Unnamed: 0_level_0,0,1
groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.918823,1.750394
G2,2.218706,-1.16236


In [17]:
df = pd.DataFrame({
    "group": "g1 g1 g2 g2 g1".split(),
    "a": "a b c d e".split(), 
    "b": "f g h i j".split(),
    })
df

Unnamed: 0,group,a,b
0,g1,a,f
1,g1,b,g
2,g2,c,h
3,g2,d,i
4,g1,e,j


In [18]:
df.apply(lambda row: list(row), axis=1)

0    [g1, a, f]
1    [g1, b, g]
2    [g2, c, h]
3    [g2, d, i]
4    [g1, e, j]
dtype: object

In [19]:
df.groupby("group").agg(lambda x: list(x))

Unnamed: 0_level_0,a,b
group,Unnamed: 1_level_1,Unnamed: 2_level_1
g1,"[a, b, e]","[f, g, j]"
g2,"[c, d]","[h, i]"
