<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Playing-with-Numpy" data-toc-modified-id="Playing-with-Numpy-1">Playing with Numpy</a></span></li><li><span><a href="#Playing-with-Pandas" data-toc-modified-id="Playing-with-Pandas-2">Playing with Pandas</a></span></li></ul></div>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Playing with Numpy

In [2]:
a = np.array([1, 2, 3])

In [3]:
a + a

array([2, 4, 6])

In [4]:
def print_array_details(a):
    print('Dimensions: %d, shape: %s, dtype: %s' %(a.ndim, a.shape, a.dtype))

In [5]:
a = np.array([1, 2, 3, 4, 5, 6, 7, 8])

In [6]:
a

array([1, 2, 3, 4, 5, 6, 7, 8])

In [7]:
print_array_details(a)

Dimensions: 1, shape: (8,), dtype: int64


In [8]:
a = a.reshape([2, 4])
a

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [9]:
print_array_details(a)

Dimensions: 2, shape: (2, 4), dtype: int64


In [10]:
a = a.reshape([2, 2, 2])
a

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [11]:
print_array_details(a)

Dimensions: 3, shape: (2, 2, 2), dtype: int64


In [12]:
x = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
x.shape

(2, 3)

In [13]:
x.shape = (6,)
x

array([1, 2, 3, 4, 5, 6], dtype=int32)

In [14]:
x = x.astype('int64')
x.dtype

dtype('int64')

In [15]:
a = np.zeros([2, 3])
a

array([[0., 0., 0.],
       [0., 0., 0.]])

In [16]:
a.dtype

dtype('float64')

In [17]:
np.ones([2, 3])

array([[1., 1., 1.],
       [1., 1., 1.]])

In [18]:
empty_array = np.empty((2,3))
empty_array

array([[1., 1., 1.],
       [1., 1., 1.]])

In [19]:
np.random.random((2, 3))

array([[0.45424667, 0.19528524, 0.96687853],
       [0.04370491, 0.27479351, 0.59065662]])

In [20]:
np.linspace(2, 10, 5)

array([ 2.,  4.,  6.,  8., 10.])

In [21]:
np.arange(2, 10, 2)

array([2, 4, 6, 8])

In [22]:
a = np.arange(16, dtype="int32")
a = a.reshape([2, 2, 4])

In [23]:
a

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]], dtype=int32)

In [24]:
a = np.arange(8)
a.shape = (2, 2, 2)

In [25]:
a

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [26]:
a = np.array([45, 65, 76, 32, 99, 22])
a < 50

array([ True, False, False,  True, False,  True])

In [27]:
a = np.arange(8).reshape((2,4))
a

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [28]:
a.min(axis=1)

array([0, 4])

In [29]:
a.sum(axis=0)

array([ 4,  6,  8, 10])

In [30]:
a.mean(axis=1)

array([1.5, 5.5])

In [31]:
a.std(axis=1)

array([1.11803399, 1.11803399])

In [32]:
pi = np.pi
a = np.array([pi, pi/2, pi/4, pi/6])
a

array([3.14159265, 1.57079633, 0.78539816, 0.52359878])

In [33]:
np.degrees(a)

array([180.,  90.,  45.,  30.])

In [34]:
sin_a = np.sin(a)
sin_a

array([1.22464680e-16, 1.00000000e+00, 7.07106781e-01, 5.00000000e-01])

In [35]:
np.round(sin_a, 7)

array([0.       , 1.       , 0.7071068, 0.5      ])

In [36]:
a = np.arange(8).reshape((2,4))
a

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [37]:
np.cumsum(a, axis=1)

array([[ 0,  1,  3,  6],
       [ 4,  9, 15, 22]])

In [38]:
np.cumsum(a)

array([ 0,  1,  3,  6, 10, 15, 21, 28])

In [39]:
def moving_average(a, n=3):
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n-1:] / n

In [40]:
a = np.arange(6)
a

array([0, 1, 2, 3, 4, 5])

In [41]:
csum = np.cumsum(a)
csum

array([ 0,  1,  3,  6, 10, 15])

In [42]:
csum[3:] = csum[3:] - csum[:-3]
csum

array([ 0,  1,  3,  6,  9, 12])

In [43]:
a = np.arange(10)
moving_average(a, 4)

array([1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5])

## Playing with Pandas

In [44]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [45]:
path = 'nobel_winners/nobel_winners.json'
df = pd.read_json(path)

In [46]:
df.head()

Unnamed: 0,born_in,category,country,date_of_death,link,name,place_of_birth,place_of_death,text,year
0,,Physics,Austria,,http://en.wikipedia.org/wiki/Victor_Francis_Hess,Victor Francis Hess,,,"Victor Francis Hess , Physics, 1936",1936
1,,Physics,Austria,,http://en.wikipedia.org/wiki/Erwin_Schr%C3%B6d...,Erwin Schrödinger,country,country,"Erwin Schrödinger , Physics, 1933",1933
2,,Physiology or Medicine,Austria,,http://en.wikipedia.org/wiki/Karl_Landsteiner,Karl Landsteiner,,located in the administrative territorial entity,"Karl Landsteiner , Physiology or Medicine, 1930",1930
3,Austria,Chemistry,,,http://en.wikipedia.org/wiki/Richard_Kuhn,Richard Kuhn *,,,"Richard Kuhn *, Chemistry, 1938",1938
4,,Physiology or Medicine,Austria,,http://en.wikipedia.org/wiki/Otto_Loewi,Otto Loewi,,,"Otto Loewi , born in Germany , Physiology or ...",1936


In [47]:
df.index

RangeIndex(start=0, stop=1116, step=1)

In [48]:
df = df.set_index('name')

In [49]:
df.loc['Albert Einstein']

Unnamed: 0_level_0,born_in,category,country,date_of_death,link,place_of_birth,place_of_death,text,year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Albert Einstein,,Physics,Switzerland,,http://en.wikipedia.org/wiki/Albert_Einstein,,,"Albert Einstein , born in Germany , Physics, ...",1921
Albert Einstein,,Physics,Germany,,http://en.wikipedia.org/wiki/Albert_Einstein,,,"Albert Einstein , Physics, 1921",1921


In [50]:
df = df.reset_index()

In [53]:
df.head()

Unnamed: 0,name,born_in,category,country,date_of_death,link,place_of_birth,place_of_death,text,year
0,Victor Francis Hess,,Physics,Austria,,http://en.wikipedia.org/wiki/Victor_Francis_Hess,,,"Victor Francis Hess , Physics, 1936",1936
1,Erwin Schrödinger,,Physics,Austria,,http://en.wikipedia.org/wiki/Erwin_Schr%C3%B6d...,country,country,"Erwin Schrödinger , Physics, 1933",1933
2,Karl Landsteiner,,Physiology or Medicine,Austria,,http://en.wikipedia.org/wiki/Karl_Landsteiner,,located in the administrative territorial entity,"Karl Landsteiner , Physiology or Medicine, 1930",1930
3,Richard Kuhn *,Austria,Chemistry,,,http://en.wikipedia.org/wiki/Richard_Kuhn,,,"Richard Kuhn *, Chemistry, 1938",1938
4,Otto Loewi,,Physiology or Medicine,Austria,,http://en.wikipedia.org/wiki/Otto_Loewi,,,"Otto Loewi , born in Germany , Physiology or ...",1936


In [90]:
df1 = pd.DataFrame({'foo': [1, 2, 3], 'bar': ['a', 'b', 'c']})
df2 = pd.DataFrame({'baz': [7, 8, 9, 11], 'qux': ['p', 'q', 'r', 't']})

In [92]:
pn = pd.Panel({'item1':df1, 'item2':df2})
pn

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 4 (major_axis) x 4 (minor_axis)
Items axis: item1 to item2
Major_axis axis: 0 to 3
Minor_axis axis: bar to qux

In [93]:
pn['item1']

Unnamed: 0,bar,baz,foo,qux
0,a,,1.0,
1,b,,2.0,
2,c,,3.0,
3,,,,
