# Chapter 10 Numpy, Scipy and Pandas


## Numpy

In [7]:
!pip install biopython



In [10]:
import numpy as np

In [12]:
my_list = [1,2,3]

In [13]:
my_array = np.array(my_list)
my_array

array([1, 2, 3])

In [14]:
type(my_array)

numpy.ndarray

In [15]:
my_list*2

[1, 2, 3, 1, 2, 3]

In [16]:
my_array*2

array([2, 4, 6])

In [17]:
np.arange(0,10,3)

array([0, 3, 6, 9])

In [18]:
np.linspace(0,10,3) # similar to arange but allows you define the number of values (and then infers step size)

array([ 0.,  5., 10.])

In [19]:
np.zeros(10) # vector of zeros

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [20]:
np.zeros((10,3)) # matrix of zeros

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [21]:
np.zeros((10,3,3)) # tensor of zeros

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [24]:
my_array = np.arange(9)
my_array

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [25]:
my_array.reshape(3,3) # arrange into a 3x3 matrix

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [26]:
my_array.ndim # number of dimensions

1

In [27]:
new_array = my_array.reshape(3,3)

In [28]:
new_array.shape # length along each dimension

(3, 3)

In [29]:
new_array.ndim

2

In [30]:
new_array.sum()

36

In [31]:
new_array.min()

0

In [32]:
new_array

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [34]:
np.mean(new_array, axis = 1)

array([1., 4., 7.])

In [35]:
np.random.normal(10, 2, 5)

array([ 9.20645322,  8.64918396, 10.66407336, 11.92989163, 12.0419305 ])

In [44]:
my_array = np.array([[1,2,3],[4,5,6],[7,8,9]])
my_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [45]:
my_array[2,1:]

array([8, 9])

In [46]:
ar1 = my_array[2,0:]
ar2 = my_array

In [47]:
ar1

array([7, 8, 9])

In [43]:
ar2

array([[     1,      2,      3],
       [     4,      5,      6],
       [100000,      8,      9]])

In [48]:
ar1[0] = 100000

In [49]:
ar2

array([[     1,      2,      3],
       [     4,      5,      6],
       [100000,      8,      9]])

In [50]:
my_array

array([[     1,      2,      3],
       [     4,      5,      6],
       [100000,      8,      9]])

In [51]:
ar3 = my_array.copy()
ar1[0] = 0

In [52]:
ar1

array([0, 8, 9])

In [53]:
ar2

array([[1, 2, 3],
       [4, 5, 6],
       [0, 8, 9]])

In [54]:
ar3

array([[     1,      2,      3],
       [     4,      5,      6],
       [100000,      8,      9]])

Exercise 1:

1a. Generate a matrix of random integers (from 1 to 1000) with dimensions (10,3). Set the seed function to 101 to generate the same set of random numbers, i.e. np.random.seed(101)

1b. What is the average value of the 5th row (to 2 decimal places)?

1c. What is the maximum value in the last two rows of the first and third columns?

1d. Make a view and a copy of the random_integers array and show how changes to views are propoagating but changes to copies are not.

1e. Retrieve the sum of all columns.

In [68]:
np.random.seed(101)

In [69]:
my_array = np.random.randint(1, 1000, 30).reshape(10,3)
my_array

array([864, 524, 338, 839, 576, 600,  76, 394, 974, 553, 645, 576, 937,
       758, 317, 733, 705, 111,   6, 909, 478,  41,  50, 852, 624, 507,
       137, 372, 926, 884])

In [71]:
my_matrix = my_array.reshape(10,3)

array([[864, 524, 338],
       [839, 576, 600],
       [ 76, 394, 974],
       [553, 645, 576],
       [937, 758, 317],
       [733, 705, 111],
       [  6, 909, 478],
       [ 41,  50, 852],
       [624, 507, 137],
       [372, 926, 884]])

In [72]:
my_array

array([864, 524, 338, 839, 576, 600,  76, 394, 974, 553, 645, 576, 937,
       758, 317, 733, 705, 111,   6, 909, 478,  41,  50, 852, 624, 507,
       137, 372, 926, 884])

In [74]:
my_matrix[4,].mean()

670.6666666666666

In [86]:
my_matrix[8:10,[0,2]].max()

884

In [89]:
my_matrix.sum()

16306

Exercise 2: 

In [94]:
array_2a = np.arange(1, 16, 1).reshape(5,3)

In [95]:
array_2a

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [13, 14, 15]])

In [97]:
array_2b = np.random.random(90).reshape(10,3,3)

In [98]:
array_2b

array([[[0.68913435, 0.4902087 , 0.14215381],
        [0.53385991, 0.05080687, 0.89271852],
        [0.74791018, 0.02457922, 0.284852  ]],

       [[0.42510608, 0.97544924, 0.02247369],
        [0.26014278, 0.39412181, 0.81974193],
        [0.99094014, 0.59933526, 0.54546031]],

       [[0.10155918, 0.64420452, 0.25066092],
        [0.35465498, 0.44609054, 0.2023882 ],
        [0.36442523, 0.84492992, 0.10747345]],

       [[0.93161573, 0.13115089, 0.95032666],
        [0.55674417, 0.45237301, 0.00633243],
        [0.26977194, 0.55642551, 0.32422062]],

       [[0.57389613, 0.61782581, 0.35072911],
        [0.55975502, 0.25530518, 0.78043202],
        [0.20399282, 0.495917  , 0.33530671]],

       [[0.29749974, 0.43069325, 0.64677615],
        [0.43719506, 0.46396243, 0.56779589],
        [0.68230378, 0.39887303, 0.47763713]],

       [[0.3110016 , 0.24291535, 0.7915267 ],
        [0.95677084, 0.44256991, 0.63211456],
        [0.3009027 , 0.57612014, 0.00329386]],

       [[0.56788413,

Exercise 3:
    
Familiarise yourself with these other functions.

np.log(x)

np.cumsum(x)

np.power(x,2)

In [104]:
array_3 = np.arange(1,9,1)
array_3

array([1, 2, 3, 4, 5, 6, 7, 8])

In [102]:
np.log(array_3)

array([0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791,
       1.79175947, 1.94591015, 2.07944154])

In [105]:
np.cumsum(array_3)

array([ 1,  3,  6, 10, 15, 21, 28, 36])

## Pandas

In [107]:
import pandas as pd
import numpy as np

In [108]:
data = pd.read_csv("data/ecotype-OB.csv")
print(type(data))

FileNotFoundError: [Errno 2] No such file or directory: 'data/ecotype-OB.csv'