In [4]:
import numpy as np
import os
import pickle
import scipy as sp

Lists, tuples and arrays: https://physics.nyu.edu/pine/pymanual/html/chap3/chap3_arrays.html

In [4]:
# concatenating strings with +
# cannot concatenate string type and integer type
a = "I am Padma,"
b = "about to turn"
c = "27"
a + " " + b + " " + c 

'I am Padma, about to turn 27'

In [17]:
# lists are defined as [ item1, item2, ...]
b = [5., "girl", 2+8j, "horse", 21]

b[0] # access first element
b[2] # "j" denotes imaginary number

b[-1] # first element starting from the last
b[-2] # last but one


'horse'

In [18]:
# modifying contents of list
b[-2] = b[-2] + "'s stable"
b[0] = b[0] + np.pi # pi can be accessed using NumPy, math or SciPy modules
b

# note: Run the concatenation code once, otherwise, it will get concatenated several times


[8.141592653589793, 'girl', (2+8j), "horse's stable", 21]

In [23]:
# list1 + list2 concatenates the lists, doesn't add element-wise
a = [1,6,0,9]
c = [2,3,4,5]
print(a + b) ; print(a + c)


[1, 6, 0, 9, 8.141592653589793, 'girl', (2+8j), "horse's stable", 21]
[1, 6, 0, 9, 2, 3, 4, 5]


In [29]:
b

[8.141592653589793, 'girl', (2+8j), "horse's stable", 21]

In [28]:
# Slicing lists
# list_name[i:j] outputs a list of size j-i
len(b) # length of list
b[0:3] #  b[0], b[1], b[2] 

[8.141592653589793, 'girl', (2+8j)]

In [34]:
b[2:3] # just b[2]
b[2:] #  b[2] till the end
b[:3]  # beginning to b[2]
b[:] # print all elements
b[1:-1] # b[1] to the last but one element

['girl', (2+8j), "horse's stable"]

Python range() function : https://cs.stanford.edu/people/nick/py/python-range.html

In [41]:
# creating and augmenting lists
# list_name = range(start, stop, step_size)
# range(n) returns n digits from 0 to n-1

a = range(10)
a # doesn't output the seq of numbers 

range(0, 10)

In [43]:
list(range(10)) # use list to get a list printed

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [46]:
# prints integers from 0 to 9
for i in range(10):
    print(i)

0
1
2
3
4
5
6
7
8
9


[]

In [47]:
list(range(0))  # returns nothing
list(range(-5)) # returns nothing for any n <=0

[]

In [52]:
# can reverse order of range of numbers using  reversed()
a = range(10) 
a_reversed = reversed(range(10))
print(list(a)), print(list(a_reversed))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


(None, None)

In [55]:
# works on strings as well
b = "python"
list(reversed(b))

['n', 'o', 'h', 't', 'y', 'p']

In [56]:
list(range(3,8)) # list of size 8-3 = 5

[3, 4, 5, 6, 7]

In [58]:
# AP with positive common diff 
# increasing seq
list(range(2,14,2)) # AP sequence with common diff = 2; a_0 = 2 and a_end = 12

[2, 4, 6, 8, 10, 12]

In [61]:
# AP with negative common diff 
# decreasing seq
list(range(14,0,-2))  # start at 14, reduce by 2 and end seq at 2

[14, 12, 10, 8, 6, 4, 2]

In [63]:
a = range(1,10,3)
list(a)

[1, 4, 7]

In [65]:
# use + to add list [99, 100] to list a
a1 = [99,100] + list(a)
a2 = list(a) + [99,100]
print(a1); print(a2)

[99, 100, 1, 4, 7]
[1, 4, 7, 99, 100]


In [66]:
# insert elements in-between
a1[:3] + ["add here"] + a1[3:]

[99, 100, 1, 'add here', 4, 7]

In [73]:
# tuples are lists that are immutable.
# That is, once defined, the individual elements of a tuple cannot be changed.
# defined using ()
a = (1,2,3,4)
a[0] # access 0th element

1

In [None]:
# a[0] = a[0] + 2 # gives error
#a = (-3) + a + (9) # gives error, can't concatenate like lists

Multi-dimensional lists and tuples

In [83]:
a = [[3, 9], [8, 5], [11, 1]]
a[0] # just the list [3,9], NOT [[3,9]]
a[0:2] # list containg [3,9] and [8,5]
a[0][1] # 1st element of 0th list element

# same holds for tuples
b = ((3,4,"padma"), (1,2,4,5))
b[0]
b[1][3]

5

NumPy array is similar to a list but where all the elements of the list are of the same type. 

The elements of a NumPy array are usually numbers, but can also be boolians, strings, or other objects. 

When the elements are numbers, **they must all be of the same type**. For example, they might be all integers or all floating point numbers.

**NumPy has different ways of creating arrays**
1. `np.array()`
2. `np.linspace()` and `np.logspace()`
3. `np.arange()`          
 *Note that this is NOT arrange(), it is arange()*
4. `zeros()` and `ones()`

In [120]:
# converting list to array using np.array()
a = [0, 0, 1, 4, 7, 16, 31, 64, 127]
b = np.array(a)
b # integer array

# np.array() automatically promotes all of the numbers to the type of the most general entry in the list
a[2] = 1.0
b1 = np.array(a)
b1 # floating point array

array([  0.,   0.,   1.,   4.,   7.,  16.,  31.,  64., 127.])

In [100]:
# linspace(start, stop, N)
# to create N evenly spaced numbers between start value and stop values. (both inclusive)

%precision 3 # round off upto three places
np.linspace(0,10, 7)

# 7 evenly spaced points b/w 0, 10 (including 0 and 10)

array([ 0.   ,  1.667,  3.333,  5.   ,  6.667,  8.333, 10.   ])

In [101]:
# logspace(start, stop, N)
# to create N evenly spaced numbers between 10^start and 10^stop values

np.logspace(0,3,6)

# 6 evenly spaced numbers b/w 10^0 and 10^3 i.e. 1 and 1000 

array([   1.   ,    3.981,   15.849,   63.096,  251.189, 1000.   ])

In [112]:
# np.arange(start, stop, step)
# similar to range() syntax, unlike range(), the arange() function can generate floating point arrays

np.arange(0, 10, 2) # integer array
np.arange(0.,10, 2) # floating point array
np.arange(0, 10, 1.5) # floating point array

array([0. , 1.5, 3. , 4.5, 6. , 7.5, 9. ])

In [116]:
# special arrays containing only zeros or only ones
# default output is floating point array

np.zeros(4) # floating point
np.ones(6) # floating point

np.zeros(4, dtype=int) # integer array
np.ones(6, dtype=int) # integer array

array([1, 1, 1, 1, 1, 1])

Mathematical operations with arrays

In [135]:
# elementwise operations
a = np.linspace(-1,5,10)
print(a); print(a*3)

a + 3 
a**2 # element wise exponent
np.sin(a)
np.exp(a)
np.log(np.exp(a))

[-1.    -0.333  0.333  1.     1.667  2.333  3.     3.667  4.333  5.   ]
[-3. -1.  1.  3.  5.  7.  9. 11. 13. 15.]


array([-1.   , -0.333,  0.333,  1.   ,  1.667,  2.333,  3.   ,  3.667,
        4.333,  5.   ])

Operations not well defined return `nan` or `inf` or `-inf`

In [137]:
np.log(0) # -inf
np.log(-1) # nan

  np.log(0) # -inf
  np.log(-1)


nan

Arrays can also be added, subtracted, multiplied, and divided by each other on an element-by-element basis, provided the two arrays have the same size.

These kinds of operations with arrays are called **vectorized operations** because the entire array, or “vector”, is processed as a unit. 

Vectorized operations are much faster than processing each element of arrays one by one.

In [142]:
a = np.array([1,2,3])
b = np.array([10,100,1000])
c = np.array([5,3,3])

a + b # array([  11,  102, 1003])
a*b # array([  10,  200, 3000])
a**c # array([ 1,  8, 27])
a/b # array([0.1  , 0.02 , 0.003])

array([0.1  , 0.02 , 0.003])

Multi-dimensional arrays and matrices

So far, we only saw 1-d arrays. Matrices, for instance are 2-d arrays.

The `np.array()` function converts a n-dim list into a n-dim array. The `np.array()` function makes all the elements have the same data type as the most complex entry.

Major diff b/w NumPy arrays and lists is that the latter can be made up of different datatypes while the former cannot.

In [157]:
# both np.ones((3,4)) and np.ones
a = np.ones((3,4), dtype=int)  # matrix of ones of size 3x4
a

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

In [161]:
a[0] = (1,2,3,4)
a[0][1] = 100
a

array([[  1, 100,   3,   4],
       [  1,   1,   1,   1],
       [  1,   1,   1,   1]])

In [162]:
np.eye(3) # 3x3 identity matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

Multidimensional arrays can be created from 1-d arrays using the `reshape()` function. 

In [9]:
a = np.arange(1,11)
a1 = np.reshape(a, (2,5))
a2 = np.reshape(a, (5,2))
a

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [176]:
# indexing is intuitive - two options
a1[0,2] 
a1[0][2]

3

Addition, subtraction, multiplication, division, and exponentiation all work with n-dim arrays just like with 1-d arrays, on an **element-by-element basis**.

Functions also act on an element-to-element basis.

In [179]:
a1/10
a1**2
np.sin(a1)

array([[ 0.841,  0.909,  0.141, -0.757, -0.959],
       [-0.279,  0.657,  0.989,  0.412, -0.544]])

We can multiply two `np.arrays`. However, **array multiplication is NOT matrix multiplication**.

`np.array` multiplication is point-wise. So, it is well-defines when the arrays have the same size.

Ususal matrix multiplication is done using `np.dot( , )`.

Note: `np.array` function’s ``.T`` *transpose method* does matrix transpose

In [218]:
a = np.arange(0,4,0.2)
len(a) # 20
a = np.reshape(a, (5,4)) # 5x4 matrix
b = 10*np.ones([5,4])  # 5x4 matrix

a # to get dimensions

array([[0. , 0.2, 0.4, 0.6],
       [0.8, 1. , 1.2, 1.4],
       [1.6, 1.8, 2. , 2.2],
       [2.4, 2.6, 2.8, 3. ],
       [3.2, 3.4, 3.6, 3.8]])

In [204]:
a*b # pointwise multiplication

array([[ 0.,  2.,  4.,  6.],
       [ 8., 10., 12., 14.],
       [16., 18., 20., 22.],
       [24., 26., 28., 30.],
       [32., 34., 36., 38.]])

In [217]:
# cannot rewrite a with at i.e. can't write a = a.T
at = a.T # take the transpose
at

array([[0. , 0.8, 1.6, 2.4, 3.2],
       [0.2, 1. , 1.8, 2.6, 3.4],
       [0.4, 1.2, 2. , 2.8, 3.6],
       [0.6, 1.4, 2.2, 3. , 3.8]])

In [219]:
print(a.shape); print(at.shape)

(5, 4)
(4, 5)


Use `np.where` to identify indices of an array that satisfy a given condition.

Example: 
When `np.where` is used with a 2-dimensional array, it returns a tuple of arrays, each containing the indices of the elements that satisfy the condition along a specific axis. Specifically, the first array in the tuple contains the row indices, and the second array contains the column indices.

In [7]:
arr = np.array([1,12,13,1,15,1])
arr2 = np.array([[11, 2, 13], [0, 15, 1]])
condition = lambda x:x>3

indices = np.where(condition(arr2))
print(indices)  # Output: (array([0, 0, 1]), array([0, 2, 1]))

# subset the indices array the usual way
indices[0][2]

(array([0, 0, 1]), array([0, 2, 1]))


1

In [5]:
# To get the actual elements that satisfy the condition:
elements = arr2[indices]
print(elements)  # Output: [11 13 15]

[11 13 15]


Lists vs arrays
- They are both multi-element data structures but lists are part of core Python while arrays are part of numerical computing package NumPy.
- Elements of an array have to be of the same type while elements of a list can be of different types.
- Look at the other two points mentioned in the link.

Dictionaries: A dictionary is also collection of Python objects, just like a list, but one that is indexed by strings or numbers (not necessarily integers and not in any particular order) or even tuples! 

Dictionaries have syntax: `{key1:value1, key2,value2, ...}`


In [4]:
a = {"Emma":309, "Jacob":582, "Olivia":764} # key is the person, value is their room num.
a["Olivia"] # to access value associated with the key "Olivia"

764

Dictionaries can be built up and added to in a straightforward manner

In [5]:
d = {}
d["last name"] = "Alberts"
d["first name"] = "Marie"
d["birthday"] = "January 27"
print(d)

{'last name': 'Alberts', 'first name': 'Marie', 'birthday': 'January 27'}


In [3]:
print(d.keys())
print(d.values())

dict_keys(['last name', 'first name', 'birthday'])
dict_values(['Alberts', 'Marie', 'January 27'])


In [None]:
# membership test of key "Padma" in dict d
"Padma" in d

False

Reading files into Python: 

https://guides.library.upenn.edu/penntdm/python/import_files

https://www.geeksforgeeks.org/python-append-to-a-file/

https://www3.ntu.edu.sg/home/ehchua/programming/webprogramming/Python_FileText.html

In [4]:
os.chdir("basic python")
os.getcwd()

'c:\\Users\\tpr16\\OneDrive - The Pennsylvania State University\\TDA\\Neuro_stat_papers\\Mapper Algorithm\\fmri-tensor-based-analysis\\basic python'

In [4]:
os.listdir() # list contents in the directory

['basic-python.ipynb', 'sample-iris-data', 'sample_text_file.txt']

Use `open( file_path , mode = ' ' )` to process text files. While reading or writing to a file, access mode governs the type of operations possible in the opened file. Options for mode are:

1.  `'r'` - read mode i.e. opens file for reading.
2. `'a'` - append/add mode i.e. to add/append content to an existing file. Creates a new file, if such file doesn't exist.
3.  `'w'` - write mode
4. `'x'` - to create a new file

There are a few other modes:
1. `'rb'` - read in binary format
2. `'rb+'` - read and write in binary format
3. `'wb'` - write in binary format ....

In [5]:
# create a new empty file called myfile.txt in cwd
# run code once, otherwise will get error
# new_file = open("myfile.txt", mode='x')

In [6]:
os.listdir()

['basic-python.ipynb',
 'myfile.txt',
 'sample-iris-data',
 'sample_text_file.txt']

In [13]:
# open the .txt file in write mode and then close it 
new_file = open("myfile.txt", "w")
new_file.write("This is my first time writing .txt file using Python")
new_file.close()

In [16]:
# to read the contents of the file, reopen the file in read mode
new_file = open("myfile.txt", "r")
new_file.read()

'This is my first time writing .txt file using Python'

`file_name.write( str, "w")` over-writes the contents in the existing text file. 

In [17]:
new_file.close() # close it 

In [18]:
new_file = open("myfile.txt", "w")
new_file.write("Does the write function overwrite existing contents?")
new_file.close()

In [19]:
new_file = open("myfile.txt", "r")
new_file.read()

'Does the write function overwrite existing contents?'

In [20]:
new_file.close()

Use `file_name.write( str, "a")` to add contents to existing file. Note that the content gets added to the end of the text file.

In [22]:
new_file = open("myfile.txt", "a")
new_file.write("It does so only when opened in write mode.")

42

In [23]:
new_file.close()

Notice how we have to open and close the file everytime. This is inconvinent. To avoid this issue, we can use `with` statements.

In [5]:
with open("myfile.txt", "a") as f:
    f.write("The file closes once the with statement ends. We don't have to use the new_file.close command as well.")

In [8]:
with open("myfile.txt", "r") as f:
    print(f.read())

Does the write function overwrite existing contents?It does so only when opened in write mode.The file closes once the with statement ends. We don't have to use the new_file.close command as well.


https://www.w3resource.com/python-exercises/numpy/basic/numpy-basic-exercise-38.php

Working with NumPy arrays
1. `np.reshape()`
2. `np.empty()`

NumPy allow us to give one of new shape parameter as -1 (eg: (2,-1) or (-1,3) but **not** (-1, -1)). It simply means that it is an unknown dimension and we want NumPy to figure it out. And NumPy will figure this by looking at the 'length of the array and remaining dimensions' and making sure that the new shape is compatible with the original shape.

In [3]:
# create an array 
z = np.array([[1,2,3,4],
               [5,6,7,8],
               [9,10,11,12]])
z.shape

(3, 4)

In [8]:
# it vectorizes the data into column vectors
z_reshaped = z.reshape(-1) 
print(z_reshaped.shape) ; z_reshaped # vectorized by row

(12,)


array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [9]:
z.reshape(-1, 2) # unknown num of rows but 2 cols

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10],
       [11, 12]])

In [11]:
# here, we set one row and unknown cols
z.reshape(1, -1).shape

(1, 12)

`np.empty` returns a new array of given shape and type, without initializing entries.

In [19]:
a = np.empty((2,2))
a

array([[0.00000000e+000, 6.61422510e-312],
       [6.61422509e-312, 6.61422509e-312]])

In [21]:
# default dtype is float 
a = np.empty(shape=(2,2), dtype=int)
a

array([[ 1337391035, -1866779007],
       [ -122212677,  -752865888]])

In [25]:
# what does shape 0 mean?
# why is it useful?
b = np.empty(shape=(0,2))
b.shape

(0, 2)

`np.apply_along_axis(func1d = , axis = 0, arr = , ... )` \
`axis = 0` applies function along columns. `axis = 0` applies function along rows


In [None]:
#Average first and last element of a 1-D array
def my_func(a):
    return (a[0] + a[-1])*0.5

b = np.array([[1,2,3], [4,5,6], [7,8,9],[10,11,12]])

np.apply_along_axis(my_func, 1, b) # along rows

array([ 2.,  5.,  8., 11.])

In [7]:
np.apply_along_axis(sorted, 1, b)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [8]:
np.apply_along_axis(np.diag, -1, b)

array([[[ 1,  0,  0],
        [ 0,  2,  0],
        [ 0,  0,  3]],

       [[ 4,  0,  0],
        [ 0,  5,  0],
        [ 0,  0,  6]],

       [[ 7,  0,  0],
        [ 0,  8,  0],
        [ 0,  0,  9]],

       [[10,  0,  0],
        [ 0, 11,  0],
        [ 0,  0, 12]]])

Tolerance for Floating-Point Comparison: `np.isclose(np.linalg.norm(u), 1, atol=1e-9)`

In [2]:
# compares each ordered list
np.less([0, 2], [2, 2]) 

# The < operator is shorthand for np.less
a = np.array([1, 2])
b = np.array([2, 2])
a < b

array([ True, False])

Similarly `np.greater` (and its corresponding `>` operator), `np.equal`, `np.not_equal` ,`np.greater_equal`, `np.less_equal`

In [3]:
print(np.greater([4,2],[2,2]))

print(np.not_equal([1.,2.], [1., 3.]))
print(np.not_equal([1, 2], [[1, 3],[1, 4]]))

[ True False]
[False  True]
[[False  True]
 [False  True]]


use `np.newaxis` to create an additional dimension. Note that `...` mean "all existing dimensions"

In [5]:
w = sp.stats.beta.rvs(1, 2, size=(5, 7))
w_new = w[...,np.newaxis]
print(w.shape); print(w_new.shape)
w_new[:,:,0]

(5, 7)
(5, 7, 1)


array([[0.37506881, 0.30578965, 0.84391689, 0.12430801, 0.15955344,
        0.4710746 , 0.00150184],
       [0.02179871, 0.78600676, 0.33002911, 0.24088821, 0.34641145,
        0.34603568, 0.16517296],
       [0.7388497 , 0.14384383, 0.58230479, 0.12829159, 0.28792213,
        0.0463683 , 0.80188196],
       [0.4604551 , 0.57529989, 0.72119362, 0.14707281, 0.34974417,
        0.58172569, 0.40603798],
       [0.21569591, 0.46383352, 0.34564811, 0.61026984, 0.43184137,
        0.09586428, 0.08650372]])