# Load the patient data into our program

For special tasks we need to import different packages. For mathematical operations and data manipulation, the package `numpy` provides suitable functionality and we will define a short-hand notation `np` to which we will always refer. 

In [2]:
# import <package> as <short name>
import numpy as np

In [3]:
# loading the data: filename and delimiter have to be strings. 
# please change to your directory where the data files are stored
data = np.loadtxt(fname='../data/inflammation-01.csv', delimiter=',')

In [4]:
# check the data that we loaded
print(data)
print(type(data)) # data is an object of the numpy.ndarray class. 
print(data.shape) # it has several attributes like its shape or data type (dtype)
print(data.dtype) # numpy provides different data types, like float64

[[0. 0. 1. ... 3. 0. 0.]
 [0. 1. 2. ... 1. 0. 1.]
 [0. 1. 1. ... 2. 1. 1.]
 ...
 [0. 1. 1. ... 1. 1. 1.]
 [0. 0. 0. ... 0. 2. 0.]
 [0. 0. 1. ... 1. 1. 0.]]
<class 'numpy.ndarray'>
(60, 40)
float64


Now our imported data set looks like this:
$$
\text{data} = \begin{pmatrix}
\text{data}[0,0] & \text{data}[0,1] & \cdots &  \text{data}[0,39] \\
\text{data}[1,0] & \text{data}[1,1] & \cdots &  \text{data}[1,39] \\
\vdots & \vdots & \ddots & \vdots \\
\text{data}[59,0] & \text{data}[59,1] & \cdots  & \text{data}[59,39]
\end{pmatrix}
$$
Notice that Python starts counting and indexing at `0`.

# Access elements of the data

We access elements as `data[0,2]` which gives `1`. We can also access a full column or row by putting `:` as the respective index.

In [7]:
print('11-element of the matrix: ',data[0,0])
print('02-element of the matrix: ',data[0,2])
print('inflammation of patient 1 for each day: ',data[1,:])
print('inflammation of all patients on day 20: ',data[:,20])

11-element of the matrix:  0.0
02-element of the matrix:  1.0
inflammation of patient 1 for each day:  [ 0.  1.  2.  1.  2.  1.  3.  2.  2.  6. 10. 11.  5.  9.  4.  4.  7. 16.
  8.  6. 18.  4. 12.  5. 12.  7. 11.  5. 11.  3.  3.  5.  4.  4.  5.  5.
  1.  1.  0.  1.]
inflammation of all patients on day 20:  [ 6. 18. 19. 17.  9. 12.  9. 20.  6. 18.  8.  9. 13. 15. 12.  9. 13. 13.
  7.  9. 18. 12. 16. 14. 17.  7.  9. 15. 20. 14. 13. 19. 11. 10. 14. 15.
 16. 15. 19. 15. 18. 19. 12. 18.  6. 14. 19. 10. 12. 16.  5. 20. 11. 15.
 16. 16. 14.  9.  8.  6.]


# Slice out sections of an array

If we do not want to have a full column or row, we can specify the index range via `start:stop`. Notice that the value of `stop` is excluded.

Example: `data[3:5,:]` gives the data of the patients 3 and 4, since 5 is excluded.

In [8]:
# slice out the data of patients 3 and 4
print(data[3:5,:])
print(data[3:5,:].shape)

[[ 0.  0.  2.  0.  4.  2.  2.  1.  6.  7. 10.  7.  9. 13.  8.  8. 15. 10.
  10.  7. 17.  4.  4.  7.  6. 15.  6.  4.  9. 11.  3.  5.  6.  3.  3.  4.
   2.  3.  2.  1.]
 [ 0.  1.  1.  3.  3.  1.  3.  5.  2.  4.  4.  7.  6.  5.  3. 10.  8. 10.
   6. 17.  9. 14.  9.  7. 13.  9. 12.  6.  7.  7.  9.  6.  3.  2.  2.  4.
   2.  0.  1.  1.]]
(2, 40)


In [9]:
# get the data of patients 3 to 6 between day 4 to 9
small = data[3:7,4:10]
print(small)
print(small.shape)

[[4. 2. 2. 1. 6. 7.]
 [3. 1. 3. 5. 2. 4.]
 [2. 4. 2. 1. 6. 4.]
 [4. 2. 2. 5. 5. 8.]]
(4, 6)


# Some first analysis of our data

Now we would like to apply the functions `min()`, `max()`, or `std()` from the `numpy` package.

In [13]:
# what is the mean inflammation across the whole data set?
np.mean(data)

6.14875

In [15]:
# 3 assignments to 3 variables.
maxval, minval, stdval = np.max(data), np.min(data), np.std(data)

In [16]:
print('maximum inflammation:', maxval)
print('minimum inflammation:', minval)
print('standard deviation:', stdval)

maximum inflammation: 20.0
minimum inflammation: 0.0
standard deviation: 4.613833197118566


In [17]:
# check what a function does by accessing its documentation
np.max?

In [38]:
patient_0 = data[0,:]
print('maximum inflammation for patient 0:', np.max(patient_0) )

maximum inflammation for patient 0: 18.0


In [48]:
print( 'maximum inflammation over patients that was reported each day:\n', np.max(data,axis=0) )

maximum inflammation over patients that was reported each day:
 [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 19. 18. 17. 16. 15. 14. 13. 12. 11. 10.  9.  8.  7.  6.  5.
  4.  3.  2.  1.]


In [47]:
print('maximum inflammation over time that was reported for each patient:\n', np.max(data,axis=1) )

maximum inflammation over time that was reported for each patient:
 [18. 18. 19. 17. 17. 18. 17. 20. 17. 18. 18. 18. 17. 16. 17. 18. 19. 19.
 17. 19. 19. 16. 17. 15. 17. 17. 18. 17. 20. 17. 16. 19. 15. 15. 19. 17.
 16. 17. 19. 16. 18. 19. 16. 19. 18. 16. 19. 15. 16. 18. 14. 20. 17. 15.
 17. 16. 17. 19. 18. 18.]


# EXERCISES

# manipulating arrays

In [49]:
# a string works similar to an array
element = 'oxygen'

# we can access parts of it similar to arrays
print(element[0:3])
print(element[3:6])
print(element[1:])

oxy
gen
xygen


In [51]:
# we can also get an empty numpy array
data[3:3,4:4]

array([], shape=(0, 0), dtype=float64)

In [54]:
listvar = data[3:3,:]
listvar

array([], shape=(0, 40), dtype=float64)

# further manipulation

In [58]:
A = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print(A)
print(A.shape)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
(4, 3)


In [63]:
B = np.hstack([A,A])
C = np.vstack([A,A])

print(B)
print('---------------------')
print(C)

[[ 1  2  3  1  2  3]
 [ 4  5  6  4  5  6]
 [ 7  8  9  7  8  9]
 [10 11 12 10 11 12]]
---------------------
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]
 [ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [64]:
# access different columns in the array
col_first = A[:,0] # first column
col_last = A[:,-1] # last column
print(col_first)
print(col_last)

[ 1  4  7 10]
[ 3  6  9 12]


In [65]:
combined = np.zeros((4,2))
combined[:,0] = col_first
combined[:,1] = col_last
print(combined)

[[ 1.  3.]
 [ 4.  6.]
 [ 7.  9.]
 [10. 12.]]


In [102]:
numpy.hstack((A[:,:1],A[:,-1:]))

array([[ 1,  3],
       [ 4,  6],
       [ 7,  9],
       [10, 12]])

In [104]:
patient3_week1 = data[3,:7]
print(patient3_week1)

[0. 0. 2. 0. 4. 2. 2.]


In [105]:
numpy.diff(patient3_week1)

array([ 0.,  2., -2.,  4., -2.,  0.])

In [108]:
print( numpy.diff(data,axis=1) )

[[0. 0. 1. ... 3. 0. 0.]
 [0. 1. 2. ... 1. 0. 1.]
 [0. 1. 1. ... 2. 1. 1.]
 ...
 [0. 1. 1. ... 1. 1. 1.]
 [0. 0. 0. ... 0. 2. 0.]
 [0. 0. 1. ... 1. 1. 0.]]
[[ 0.  1.  2. ...  1. -3.  0.]
 [ 1.  1. -1. ...  0. -1.  1.]
 [ 1.  0.  2. ...  0. -1.  0.]
 ...
 [ 1.  0.  0. ... -1.  0.  0.]
 [ 0.  0.  1. ... -2.  2. -2.]
 [ 0.  1. -1. ... -2.  0. -1.]]


In [113]:
print( numpy.max( numpy.diff(data,axis=1), axis=1 ) )

[ 7. 12. 11. 10. 11. 13. 10.  8. 10. 10.  7.  7. 13.  7. 10. 10.  8. 10.
  9. 10. 13.  7. 12.  9. 12. 11. 10. 10.  7. 10. 11. 10.  8. 11. 12. 10.
  9. 10. 13. 10.  7.  7. 10. 13. 12.  8.  8. 10. 10.  9.  8. 13. 10.  7.
 10.  8. 12. 10.  7. 12.]


In [112]:
print( numpy.max( numpy.absolute( numpy.diff(data,axis=1) ), axis=1 ) )

[12. 14. 11. 13. 11. 13. 10. 12. 10. 10. 10. 12. 13. 10. 11. 10. 12. 13.
  9. 10. 13.  9. 12.  9. 12. 11. 10. 13.  9. 13. 11. 11.  8. 11. 12. 13.
  9. 10. 13. 11. 11. 13. 11. 13. 13. 10.  9. 10. 10.  9.  9. 13. 10.  9.
 10. 11. 13. 10. 10. 12.]


In [16]:
X = numpy.concatenate((A,A),axis=0)

In [17]:
A.shape

(4, 3)

In [18]:
X.shape

(8, 3)