## Week 1: Numpy review

Instructor: Cornelia Ilin <br>
Email: cilin@ischool.berkeley.edu <br>

This notebook is intended to familiarize you with some basics of numpy, which you'll use extensively in this course. 

In [1]:
import numpy as np

#### <span style="color:chocolate">Concept 1:</span> Homogenous data types

In [2]:
# just int
my_array = np.array(
    [1, 2, 3]
)

print(my_array)
print(my_array.shape, my_array.dtype)

[1 2 3]
(3,) int64


In [3]:
# when using mixed data, objects are cast to a common data type)
my_array = np.array(
    [1, '2', 3.0]
)

print(my_array)
print(my_array.shape, my_array.dtype)

['1' '2' '3.0']
(3,) <U32


#### <span style="color:chocolate">Concept 2:</span> The size attribute represents the total number of elements in the array

In [4]:
# the size attribute is the total number of elements in the array
my_array = np.array(
    [[1, 2],
     [3, 4],
     [5,6]],
    dtype=np.uint8
)

print('Shape:', my_array.shape,
      '\nSize:', my_array.size,
      '\nDtype:', my_array.dtype)

Shape: (3, 2) 
Size: 6 
Dtype: uint8


#### <span style="color:chocolate">Concept 3:</span> Example of tabular data layout

In [5]:
num_samples = 100
num_features = 17

tabular_layout = np.random.random(
    (num_samples, num_features)
)

tabular_layout.shape

(100, 17)

#### <span style="color:chocolate">Concept 4:</span> Example of image data layout

In [6]:
num_rows = 256
num_cols = 256
num_channels = 3
num_images = 100

image_layout = np.random.random(
    (num_images, num_rows, num_cols, num_channels)
)

image_layout.shape

(100, 256, 256, 3)

#### <span style="color:chocolate">Concept 5:</span> Mathematical operations on numpy arrays

You can find some examples here: https://numpy.org/doc/stable/reference/routines.math.html

In [7]:
print('Printing my', my_array.shape, 'array:\n')
print(my_array)

Printing my (3, 2) array:

[[1 2]
 [3 4]
 [5 6]]


In [8]:
# sum all elements
my_array.sum()

21

In [9]:
# sum by column
my_array.sum(axis=0)

array([ 9, 12], dtype=uint64)

In [10]:
# change type to float
my_array.sum(axis=1).astype(np.float64)

array([ 3.,  7., 11.])

In [11]:
# extract first axis
axis0 = my_array[0]
axis0

array([1, 2], dtype=uint8)

In [12]:
# extract second axis
axis1 = my_array[1]
axis1

array([3, 4], dtype=uint8)

In [13]:
# select individual elements by specifying indices for both axes
element = my_array[1, 0]
element

3

In [14]:
# can slice along any axis
element = my_array[0, 1:]
element

array([2], dtype=uint8)

In [15]:
# modifying an element/slice changes the original array as well
element += 10
element

array([12], dtype=uint8)

In [16]:
print('Printing my original array:')
my_array

Printing my original array:


array([[ 1, 12],
       [ 3,  4],
       [ 5,  6]], dtype=uint8)

^ Notice the value at index (0, 1)

In [17]:
# make a copy if you want to preserve the original array

element = my_array[0, 1:].copy()
element -= 10
element

array([2], dtype=uint8)

In [18]:
print('Printing my array:')
my_array

Printing my array:


array([[ 1, 12],
       [ 3,  4],
       [ 5,  6]], dtype=uint8)

In [19]:
# loop over elements in axis 0
# this example will print my_array[0] and my_array[1]

for row in my_array:
    print(row,', shape:', row.shape) 

[ 1 12] , shape: (2,)
[3 4] , shape: (2,)
[5 6] , shape: (2,)


#### <span style="color:chocolate">Concept 6:</span> Create arrays

In [20]:
# create an array of pixel values
my_pixels = np.array(
    [[0.6, 0.2],
     [0.1, 0.4],
     [0.7, 0.5]],
    dtype=np.uint8
)

In [21]:
# create a range of years from 1991 to 2017
# provide start, stop, and step params
years = np.arange(1991, 2018, 1)
years

array([1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017])

In [22]:
# create an empty array (use np.ones to create an array of ones)

empty_array = np.zeros((3, 5, 10))
print(empty_array)
print(empty_array.dtype)

[[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]]
float64


In [23]:
# create an array with a constant value using np.ones
ct_array = np.ones((3, 5, 10), dtype=np.int32) * 5
ct_array

array([[[5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5]],

       [[5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5]],

       [[5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5]]], dtype=int32)

In [24]:
# create arrays with random numbers from a normal distribution
# specify mean, variance, sample size
mu, sigma = 0, 1
normal_array = np.random.normal(mu, sigma, size=10)
normal_array

array([ 1.51400833, -0.71307223, -0.5658885 ,  0.28565782,  1.97503473,
       -0.10794173, -1.86053972, -0.63499378, -0.1858189 ,  0.43241852])

#### <span style="color:chocolate">Concept 6:</span> Reshape arrays

In [25]:
# Reshaping requires the size to be unchanged
years = np.arange(1991, 2018, 1)
years = years.reshape(3, 3, 3) # 3 nested arrays, each of size (3,3); try changing to .reshape(2, 3, 6)
years

array([[[1991, 1992, 1993],
        [1994, 1995, 1996],
        [1997, 1998, 1999]],

       [[2000, 2001, 2002],
        [2003, 2004, 2005],
        [2006, 2007, 2008]],

       [[2009, 2010, 2011],
        [2012, 2013, 2014],
        [2015, 2016, 2017]]])

In [26]:
# convert an n-dimensional array to a flat 1D array (creates a copy of the array)
years.flatten()

array([1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017])

In [27]:
# flatten creates a copy, ravel doesn't
years.ravel()

array([1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017])

In [28]:
# The original array is unchanged if you change the flattened array
years2 = years.flatten()
years2[26] = 2018 
years

array([[[1991, 1992, 1993],
        [1994, 1995, 1996],
        [1997, 1998, 1999]],

       [[2000, 2001, 2002],
        [2003, 2004, 2005],
        [2006, 2007, 2008]],

       [[2009, 2010, 2011],
        [2012, 2013, 2014],
        [2015, 2016, 2017]]])

In [29]:
# The original array is changed if you change the raveled array
years2 = years.ravel()
years2[26] = 2018 
years

array([[[1991, 1992, 1993],
        [1994, 1995, 1996],
        [1997, 1998, 1999]],

       [[2000, 2001, 2002],
        [2003, 2004, 2005],
        [2006, 2007, 2008]],

       [[2009, 2010, 2011],
        [2012, 2013, 2014],
        [2015, 2016, 2018]]])

In [30]:
# you can insert new axis of shape 1
years_1d = np.arange(1991, 2018, 1)
print(years_1d.shape)
years_2d = np.expand_dims(years_1d, axis=0)  #new axis added in front
print(years_2d.shape)

(27,)
(1, 27)


In [31]:
years_3d = np.expand_dims(years_1d, axis=(0, 1))  # new axis inserted in front and at the end 
years_3d.shape

(1, 1, 27)

YOUR TASK: Remove axis=0 from years_3d (Hint: you can remove axes with shape 1 using <span style="color:cornflowerBlue">np.squeeze()</span>)

In [32]:
# YOUR CODE HERE

#### <span style="color:chocolate">Concept 6:</span> Joining arrays

YOUR TASK: Provide 4 examples that use <span style="color:cornflowerBlue">np.concatenate()</span> and <span style="color:cornflowerBlue">np.stack()</span>

In [33]:
# YOUR CODE HERE

#### <span style="color:chocolate">Concept 7:</span> Numpy broadcasting

The term broadcasting describes how NumPy treats arrays with different shapes during arithmetic operations. You can read more here:  https://numpy.org/doc/stable/user/basics.broadcasting.html

In [34]:
# multiplying an array with a scalar
years = np.arange(1991, 2018, 1)
years * 1

array([1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017])

In [35]:
# Adding a scalar
years + 2

array([1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
       2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
       2015, 2016, 2017, 2018, 2019])

In [36]:
# Many arithmetic operations are applied element wise on a pair of arrays
a = np.arange(3)
print(a)
b = np.arange(3)
print(b)
a + b

[0 1 2]
[0 1 2]


array([0, 2, 4])

In [37]:
# this code will fail. Why?
a = np.arange(3)
#b = np.arange(4) #remove comment here and run
a + b

array([0, 2, 4])

In [38]:
# another example
a = np.ones((3, 3))
print(a)
b = np.arange(3)
print(b)
a + b

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[0 1 2]


array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

#### <span style="color:chocolate">Concept 8:</span> Standardization

In [39]:
# compute the mean and std for each feature (column in my_array)
# it is important to set keepdims=True for proper broadcasting (shape will be (1, 4))
# if you set keepdims=False, the shape will be (4,)
my_array = np.random.random((10, 4))
mean = my_array.mean(axis=0, keepdims=True)
std = my_array.std(axis=0, keepdims=True)
print('Means:', mean)
print('Stds:',std)
print('Mean shape:', mean.shape)
print('Std shape:', std.shape)

Means: [[0.39676437 0.42889563 0.42029496 0.53082178]]
Stds: [[0.29751805 0.24189962 0.23118253 0.3372268 ]]
Mean shape: (1, 4)
Std shape: (1, 4)


In [40]:
# normalize my_array
my_array_std = (my_array - mean) / std
print(my_array_std.mean(axis=0))
print(my_array_std.std(axis=0))

[ 1.11022302e-16  1.77635684e-16 -1.33226763e-16  3.83026943e-16]
[1. 1. 1. 1.]


#### <span style="color:chocolate">Concept 8:</span> Advanced

In [41]:
my_image = np.arange(0, 1, 0.04).reshape(5,5)
my_image

array([[0.  , 0.04, 0.08, 0.12, 0.16],
       [0.2 , 0.24, 0.28, 0.32, 0.36],
       [0.4 , 0.44, 0.48, 0.52, 0.56],
       [0.6 , 0.64, 0.68, 0.72, 0.76],
       [0.8 , 0.84, 0.88, 0.92, 0.96]])

In [42]:
# crop by using a border of size 1
my_image[1:-1, 1:-1]

array([[0.24, 0.28, 0.32],
       [0.44, 0.48, 0.52],
       [0.64, 0.68, 0.72]])

In [43]:
# crop by using a border of size 2
my_image[1:-2, 1:-2]

array([[0.24, 0.28],
       [0.44, 0.48]])

In [44]:
# reverse order of columns
my_image[:, ::-1]

array([[0.16, 0.12, 0.08, 0.04, 0.  ],
       [0.36, 0.32, 0.28, 0.24, 0.2 ],
       [0.56, 0.52, 0.48, 0.44, 0.4 ],
       [0.76, 0.72, 0.68, 0.64, 0.6 ],
       [0.96, 0.92, 0.88, 0.84, 0.8 ]])

In [45]:
# set to 0 elements divizible by 2
my_image[my_image % 2 == 0] = 0
my_image

array([[0.  , 0.04, 0.08, 0.12, 0.16],
       [0.2 , 0.24, 0.28, 0.32, 0.36],
       [0.4 , 0.44, 0.48, 0.52, 0.56],
       [0.6 , 0.64, 0.68, 0.72, 0.76],
       [0.8 , 0.84, 0.88, 0.92, 0.96]])