# CDS-VIS Session 1 - Introduction to Python (Cont.)

In [2]:
text_snippet = "It was the best of times, it was the worst of times"

In [3]:
text_snippet.upper()

'IT WAS THE BEST OF TIMES, IT WAS THE WORST OF TIMES'

In [4]:
text_snippet.lower()

'it was the best of times, it was the worst of times'

In [5]:
text_snippet.split()

['It',
 'was',
 'the',
 'best',
 'of',
 'times,',
 'it',
 'was',
 'the',
 'worst',
 'of',
 'times']

In [6]:
text_snippet.split(",")

['It was the best of times', ' it was the worst of times']

In [7]:
# square brackets = list type data

In [8]:
type(text_snippet.split(","))

list

## Sequence types - list

In [9]:
colours = ["blue", "green", "red"]

In [10]:
colours

['blue', 'green', 'red']

In [12]:
colours[1]

'green'

In [13]:
colours[-1]

'red'

In [14]:
# lists are heterogenous
user_data = ["mary", 42, 1.65]

In [16]:
big_list = [user_data, colours]

In [18]:
big_list

[['mary', 42, 1.65], ['blue', 'green', 'red']]

In [21]:
big_list[0][1]

42

In [22]:
user_data + colours

['mary', 42, 1.65, 'blue', 'green', 'red']

In [23]:
colours.append("yellow")

In [24]:
colours

['blue', 'green', 'red', 'yellow']

In [25]:
colours.remove("green")

In [26]:
colours

['blue', 'red', 'yellow']

## Tuples

In [27]:
colours_tup = ("blue", "green", "red")

In [28]:
colours_tup

('blue', 'green', 'red')

In [29]:
# tuples and strings are basically the same, BUT tuples can't be modified (append, remove, etc.)

## Dictionaries

In [31]:
# the key for an object and the related values to the object
user_ages = {"mary": 42, "john": 34}

In [32]:
user_ages["mary"]

42

In [33]:
user_ages["john"]

34

In [34]:
new_users = dict(mary = [42, 1.65],
                john = [50, 1.8],
                alice = [24, 1.83])

In [36]:
new_users["mary"][1]

1.65

In [38]:
newer_users = dict(mary = {"age": 42, "height": 1.65},
                john = {"age":50, "height": 1.8},
                alice = {"age":24, "height": 1.83})

In [39]:
newer_users["mary"]["age"]

42

## Number types

In [40]:
# int = a whole number, without decimal values
# float = number with decimal values

In [41]:
type(2)

int

In [42]:
type(2.1)

float

In [43]:
type(2.0)

float

In [44]:
2 == 2.0

True

In [46]:
# coerce int into float
float(2)

2.0

In [47]:
# coerce float into int (only the integer value of the number)
int(2.5)

2

In [48]:
10/2

5.0

In [50]:
10//2

5

In [51]:
10//3

3

### Boolean (bool) types

In [52]:
True == 1

True

In [53]:
False == 0

True

## Introducing numpy arrays

In [None]:
# write 'pip install numpy' in terminal

In [2]:
import os
import numpy as np

In [13]:
# create file path
filepath = os.path.join("..",
                        "..",
                        "..", 
                        "cds-vis-data", 
                        "data", 
                        "sample-data", 
                        "sample-data-01.csv")

In [15]:
data = np.loadtxt(fname = filepath, delimiter = ",")

In [16]:
data

array([[0., 0., 1., ..., 3., 0., 0.],
       [0., 1., 2., ..., 1., 0., 1.],
       [0., 1., 1., ..., 2., 1., 1.],
       ...,
       [0., 1., 1., ..., 1., 1., 1.],
       [0., 0., 0., ..., 0., 2., 0.],
       [0., 0., 1., ..., 1., 1., 0.]])

In [17]:
type(data)

numpy.ndarray

In [18]:
data.dtype

dtype('float64')

In [19]:
data.shape

(60, 40)

In [22]:
data[0,0]

0.0

In [31]:
data[-1,-2]

1.0

In [35]:
# array slice
data[:4, :10]

array([[0., 0., 1., 3., 1., 2., 4., 7., 8., 3.],
       [0., 1., 2., 1., 2., 1., 3., 2., 2., 6.],
       [0., 1., 1., 3., 3., 2., 6., 2., 5., 9.],
       [0., 0., 2., 0., 4., 2., 2., 1., 6., 7.]])

In [38]:
# select whole row
data[0,:]

array([ 0.,  0.,  1.,  3.,  1.,  2.,  4.,  7.,  8.,  3.,  3.,  3., 10.,
        5.,  7.,  4.,  7.,  7., 12., 18.,  6., 13., 11., 11.,  7.,  7.,
        4.,  6.,  8.,  8.,  4.,  4.,  5.,  7.,  3.,  4.,  2.,  3.,  0.,
        0.])

In [40]:
# select whole column
data[:,0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [41]:
# average score
np.mean(data)

6.14875

In [42]:
np.max(data)

20.0

In [43]:
np.min(data)

0.0

In [44]:
# maximum score for each patient
np.max(data, axis = 1)

array([18., 18., 19., 17., 17., 18., 17., 20., 17., 18., 18., 18., 17.,
       16., 17., 18., 19., 19., 17., 19., 19., 16., 17., 15., 17., 17.,
       18., 17., 20., 17., 16., 19., 15., 15., 19., 17., 16., 17., 19.,
       16., 18., 19., 16., 19., 18., 16., 19., 15., 16., 18., 14., 20.,
       17., 15., 17., 16., 17., 19., 18., 18.])

In [47]:
# maximum score for each day
np.max(data, axis = 0)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20., 19., 18., 17., 16., 15.,
       14., 13., 12., 11., 10.,  9.,  8.,  7.,  6.,  5.,  4.,  3.,  2.,
        1.])