In [3]:
import numpy as np
import pandas as pd

# We'll demo a bit of Scipy
import scipy.special

import bokeh_catplot
import iqplot

import bokeh.io
import bokeh.plotting

bokeh.io.output_notebook()

In [4]:
# create a numpy array from a list
my_ar = np.array([1, 2, 3, 4])

# look at it
my_ar

array([1, 2, 3, 4])

In [5]:
# the data type of stored entries
my_ar.dtype

dtype('int64')

In [6]:
# shape of the array
my_ar.shape

(4,)

In [7]:
my_ar.astype(float)

array([1., 2., 3., 4.])

In [8]:
print(my_ar.max())
print(my_ar.min())
print(my_ar.sum())
print(my_ar.mean())
print(my_ar.std())

4
1
10
2.5
1.118033988749895


In [9]:
print(np.max(my_ar))
print(np.min(my_ar))
print(np.sum(my_ar))
print(np.mean(my_ar))
print(np.std(my_ar))

4
1
10
2.5
1.118033988749895


In [10]:
# How long our arrays will be
n = 10

# Make a NumPy array of length n filled with zeros
np.zeros(n)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [11]:
# Make a NumPy array of length n filled with ones
np.ones(n)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [12]:
# Make an empty NumPy array of length n without initializing entries
# (while it initially holds whatever values were previously in the memory
# locations assigned, ones will be displayed)
np.empty(n)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [13]:
# Make a NumPy array filled with zeros the same shape as another NumPy array
my_ar = np.array([[1, 2], [3, 4]])
np.zeros_like(my_ar)

array([[0, 0],
       [0, 0]])

In [14]:
df = pd.read_csv('data/c_elegans_egg_xa.csv', comment='#')

# Take a look
df.head()

Unnamed: 0,food,area (sq. um)
0,high,1683
1,high,2061
2,high,1792
3,high,1852
4,high,2091


In [23]:
p = iqplot.strip(
    data=df,
    cats='food',
    val='area (sq. um)',
    order=['low', 'high'],
    horizontal=True,
    jitter=True,
    y_axis_label='amount of food',
    height=200,
)

bokeh.io.show(p)



In [24]:
xa_high = df.loc[df['food']=='high', 'area (sq. um)']

# Take a look at the data type
type(xa_high)

pandas.core.series.Series

In [25]:
xa_high = df.loc[df['food']=='high', 'area (sq. um)'].values

type(xa_high)

numpy.ndarray

In [26]:
xa_low = df.loc[df['food']=='low', 'area (sq. um)'].values

In [27]:
xa_high

array([1683, 2061, 1792, 1852, 2091, 1781, 1912, 1802, 1751, 1731, 1892,
       1951, 1809, 1683, 1787, 1840, 1821, 1910, 1930, 1800, 1833, 1683,
       1671, 1680, 1692, 1800, 1821, 1882, 1642, 1749, 1712, 1661, 1701,
       2141, 1863, 1752, 1740, 1721, 1660, 1930, 2030, 1851, 2131, 1828])

In [28]:
xa_low

array([1840, 2090, 2169, 1988, 2212, 2339, 1989, 2144, 2290, 1920, 2280,
       1809, 2158, 1800, 2133, 2060, 2160, 2001, 2030, 2088, 1951, 2460,
       2021, 2010, 2139, 2160, 2106, 2171, 2113, 2179, 1890, 2179, 2021,
       1969, 2150, 1900, 2267, 1711, 1901, 2114, 2112, 2361, 2130, 2061,
       2121, 1832, 2210, 2130, 2153, 2009, 2100, 2252, 2143, 2252, 2222,
       2121, 2409])

In [29]:
#reversed array
xa_high[::-1]

array([1828, 2131, 1851, 2030, 1930, 1660, 1721, 1740, 1752, 1863, 2141,
       1701, 1661, 1712, 1749, 1642, 1882, 1821, 1800, 1692, 1680, 1671,
       1683, 1833, 1800, 1930, 1910, 1821, 1840, 1787, 1683, 1809, 1951,
       1892, 1731, 1751, 1802, 1912, 1781, 2091, 1852, 1792, 2061, 1683])

In [30]:
#every fifth element, starting at index 3
xa_high[3::5]

array([1852, 1751, 1683, 1930, 1680, 1642, 2141, 1660, 1828])

In [31]:
#entries 10 to 20
xa_high[10:21]

array([1892, 1951, 1809, 1683, 1787, 1840, 1821, 1910, 1930, 1800, 1833])

In [32]:
xa_high[[1, 19, 6]]

array([2061, 1800, 1912])

In [33]:
# just slice out the fib ones
xa_high[xa_high > 2000]

array([2061, 2091, 2141, 2030, 2131])

In [34]:
np.where(xa_high > 2000)

(array([ 1,  4, 33, 40, 42]),)

In [36]:
# make an array
my_ar = np.array([1, 2, 3, 4])

#change an element 
my_ar[2] = 6

#see the result
my_ar

array([1, 2, 6, 4])

In [37]:
#re-instantiate my_ar
my_ar = np.array([1, 2, 3, 4]).astype(float)

# function to normalize x (note that /= works with mutable objects)
def normalize(x):
    x /= np.sum(x)

#pass it through a function
normalize(my_ar)

# is it normalized even though we didn't return anything? (yes)
my_ar

array([0.1, 0.2, 0.3, 0.4])

In [43]:
# make list and array
my_list = [1, 2, 3, 4]
my_ar = np.array(my_list)

# slice out of each
my_list_slice = my_list[1:-1]
my_ar_slice = my_ar[1:-1]

# mess with the slices
my_list_slice[0] = 9
my_ar_slice[0] = 9

# look at originals
print(my_list)
print(my_ar)

[1, 2, 3, 4]
[1 9 3 4]


In [44]:
# make a copy
xa_high_copy = np.copy(xa_high)

# mess with an entry
xa_high_copy[10] = 2000

# check equality
np.allclose(xa_high, xa_high_copy)

False

In [45]:
# divide one array by another
np.array([5, 6, 7, 8]) / np.array([1, 2, 3, 4])

array([5.        , 3.        , 2.33333333, 2.        ])

In [46]:
# multiply by scalar
-4 * xa_high

array([-6732, -8244, -7168, -7408, -8364, -7124, -7648, -7208, -7004,
       -6924, -7568, -7804, -7236, -6732, -7148, -7360, -7284, -7640,
       -7720, -7200, -7332, -6732, -6684, -6720, -6768, -7200, -7284,
       -7528, -6568, -6996, -6848, -6644, -6804, -8564, -7452, -7008,
       -6960, -6884, -6640, -7720, -8120, -7404, -8524, -7312])

In [47]:
# raise to power
xa_high**2

array([2832489, 4247721, 3211264, 3429904, 4372281, 3171961, 3655744,
       3247204, 3066001, 2996361, 3579664, 3806401, 3272481, 2832489,
       3193369, 3385600, 3316041, 3648100, 3724900, 3240000, 3359889,
       2832489, 2792241, 2822400, 2862864, 3240000, 3316041, 3541924,
       2696164, 3059001, 2930944, 2758921, 2893401, 4583881, 3470769,
       3069504, 3027600, 2961841, 2755600, 3724900, 4120900, 3426201,
       4541161, 3341584])

In [48]:
# new 2D array using the reshape() method
my_ar = xa_high.reshape((11, 4))

# look at it
my_ar

array([[1683, 2061, 1792, 1852],
       [2091, 1781, 1912, 1802],
       [1751, 1731, 1892, 1951],
       [1809, 1683, 1787, 1840],
       [1821, 1910, 1930, 1800],
       [1833, 1683, 1671, 1680],
       [1692, 1800, 1821, 1882],
       [1642, 1749, 1712, 1661],
       [1701, 2141, 1863, 1752],
       [1740, 1721, 1660, 1930],
       [2030, 1851, 2131, 1828]])

In [49]:
# make list of lists
list_of_lists = [[1, 2], [3, 4]]

#pull value in first row second column
list_of_lists[0][1]

2

In [50]:
my_ar[0, 1]

np.int64(2061)

In [51]:
my_ar[2, :]

array([1751, 1731, 1892, 1951])

In [52]:
my_ar[my_ar > 2000]

array([2061, 2091, 2141, 2030, 2131])

In [53]:
# fancy indexing
my_ar[(np.array([0, 1, 8, 10, 10]), np.array([1, 0, 1, 0, 2]))]

array([2061, 2091, 2141, 2030, 2131])

In [55]:
combined = np.concatenate((xa_high, xa_low))
# look at it
combined

array([1683, 2061, 1792, 1852, 2091, 1781, 1912, 1802, 1751, 1731, 1892,
       1951, 1809, 1683, 1787, 1840, 1821, 1910, 1930, 1800, 1833, 1683,
       1671, 1680, 1692, 1800, 1821, 1882, 1642, 1749, 1712, 1661, 1701,
       2141, 1863, 1752, 1740, 1721, 1660, 1930, 2030, 1851, 2131, 1828,
       1840, 2090, 2169, 1988, 2212, 2339, 1989, 2144, 2290, 1920, 2280,
       1809, 2158, 1800, 2133, 2060, 2160, 2001, 2030, 2088, 1951, 2460,
       2021, 2010, 2139, 2160, 2106, 2171, 2113, 2179, 1890, 2179, 2021,
       1969, 2150, 1900, 2267, 1711, 1901, 2114, 2112, 2361, 2130, 2061,
       2121, 1832, 2210, 2130, 2153, 2009, 2100, 2252, 2143, 2252, 2222,
       2121, 2409])

In [56]:
# exponential
np.exp(xa_high / 1000)

array([5.38167681, 7.8538197 , 6.00144336, 6.37255189, 8.09300412,
       5.93578924, 6.76660849, 6.06175887, 5.76036016, 5.64629738,
       6.63262067, 7.03571978, 6.10434004, 5.38167681, 5.97151103,
       6.29653826, 6.1780334 , 6.7530888 , 6.88951024, 6.04964746,
       6.2526164 , 5.38167681, 5.31748262, 5.36555597, 5.43033051,
       6.04964746, 6.1780334 , 6.56662499, 5.16549017, 5.74885095,
       5.54003047, 5.26457279, 5.47942408, 8.50794132, 6.44303692,
       5.7661234 , 5.69734342, 5.59011579, 5.25931084, 6.88951024,
       7.61408636, 6.36618252, 8.42328589, 6.22143134])