### <font color="brown">Set</font>

#### <font color="brown">Building a set out of non-structure types (numeric, string, boolean)</font>

In [1]:
set1 = {2,2,3}
print(set1)

{2, 3}


In [2]:
# can items be of different types
set2 = {2,"two","three",True,12.6}
print(set2)

{True, 2, 'two', 'three', 12.6}


In [3]:
# can sets contain structures?
set3 = {[1,2,3]}
print(set3)

TypeError: unhashable type: 'list'

**Sets can't have structures as members**

In [4]:
# are sets ordered?
set2[0]

TypeError: 'set' object is not subscriptable

**Sets are unordered, so cannot be indexed**

---

#### <font color="brown">Building a set out of iterables (strings, lists, tuples)</font>

In [7]:
set1 = set("apple")
print(set1)
set2 = set("pie")
print(set2)
set3 = set([1,3,4,2,1])
print(set3)
set4 = set(('x','y','x','z'))
print(set4)

{'p', 'e', 'a', 'l'}
{'p', 'e', 'i'}
{1, 2, 3, 4}
{'z', 'y', 'x'}


---

#### <font color="brown">Adding to a set</font>

In [8]:
myset = set()
myset.add('a')
myset.add('p')
myset.add('p')
print(myset)

{'p', 'a'}


In [9]:
# add an iterable?
myset.add([1,2,3])
myset

TypeError: unhashable type: 'list'

**An iterable cannot be 'add'ed**

---

#### <font color="brown">Updating a set</font>

In [10]:
# updating with another set
myset.update(set('pqr'))
myset

{'a', 'p', 'q', 'r'}

In [11]:
# updating with a list (iterable)
myset.update(['x','y','z'])
myset

{'a', 'p', 'q', 'r', 'x', 'y', 'z'}

In [12]:
# updating with a tuple (iterable)
myset.update((10,15))
myset

{10, 15, 'a', 'p', 'q', 'r', 'x', 'y', 'z'}

In [13]:
# updating with a non-structure
myset.update('m')
myset

{10, 15, 'a', 'm', 'p', 'q', 'r', 'x', 'y', 'z'}

**Above is same as using 'add' method**

In [14]:
# updating with a string as iterable?
myset.update('def')
myset

{10, 15, 'a', 'd', 'e', 'f', 'm', 'p', 'q', 'r', 'x', 'y', 'z'}

**When update is used, string parameter is treated as an iterable**

---

#### <font color="brown">Set Operations</font>

In [16]:
print(set("apple") | set("pie"))  # union

print(set("apple") & set("pie"))  # intersection

print(set("apple") - set("pie"))  # difference
print(set("pie") - set("apple"))

print(set("pie") <= set("apple")) # subset
print(set("pe") <= set("apple"))
print(set("peal") <= set("apple"))

print(set("pe") < set("apple"))   # proper subset
print(set("peal") < set("apple"))

print(set("set") ^ set("list"))   # union difference intersection

print('i' in set("pie"))  # membership

{'p', 'i', 'l', 'e', 'a'}
{'p', 'e'}
{'a', 'l'}
{'i'}
False
True
True
True
False
{'i', 'e', 'l'}
True


**Example of set usage: find unique words in document**

In [88]:
def getword(token):
    token = token.strip(',.')
    if not token.isalpha():
        return None
    if len(token) < 4:
        return None
    return token.lower()

In [89]:
from collections import Counter

unique_words = set()
word_count = Counter()
for line in open('metamorphosis.txt'):
    tokens = line.split()  # default separator is whitespace
    for token in tokens:
        word = getword(token)
        if word:
            unique_words.add(word)
            word_count.update([word])

In [90]:
print(len(word_count))
print(len(unique_words))

64
64


In [91]:
print(unique_words)

{'tried', 'sleep', 'back', 'when', 'gregor', 'floundering', 'threw', 'weather', 'hitting', 'pane', 'heard', 'made', 'sleeping', 'hundred', 'eyes', 'rain', 'never', 'mild', 'quite', 'where', 'himself', 'then', 'longer', 'stopped', 'because', 'always', 'this', 'something', 'turned', 'legs', 'however', 'times', 'there', 'window', 'little', 'nonsense', 'began', 'look', 'shut', 'dull', 'before', 'could', 'used', 'must', 'onto', 'into', 'about', 'rolled', 'which', 'unable', 'pain', 'position', 'only', 'forget', 'that', 'have', 'present', 'felt', 'right', 'thought', 'state', 'feel', 'drops', 'hard'}


In [92]:
print(sorted(unique_words))

['about', 'always', 'back', 'because', 'before', 'began', 'could', 'drops', 'dull', 'eyes', 'feel', 'felt', 'floundering', 'forget', 'gregor', 'hard', 'have', 'heard', 'himself', 'hitting', 'however', 'hundred', 'into', 'legs', 'little', 'longer', 'look', 'made', 'mild', 'must', 'never', 'nonsense', 'only', 'onto', 'pain', 'pane', 'position', 'present', 'quite', 'rain', 'right', 'rolled', 'shut', 'sleep', 'sleeping', 'something', 'state', 'stopped', 'that', 'then', 'there', 'this', 'thought', 'threw', 'times', 'tried', 'turned', 'unable', 'used', 'weather', 'when', 'where', 'which', 'window']


---

### <font color="brown">Random</font>

In [19]:
import random

In [20]:
articles = ["the", "a"]
subjects = ["man", "woman", "boy", "girl", "scientist", "loser", "poser"]
verbs = ["jumped", "sang", "ran", "cried", "laughed", "played", "programmed"]
adverbs = ["loudly", "badly", "heavily", "softly", "madly", "sadly"]

In [26]:
line = 1
while line < 6:
    str = ""
    str += random.choice(articles)
    str += " " + random.choice(subjects)
    str += " " + random.choice(verbs)
    adv = random.randint(0,1)
    if adv:
         str += " " + random.choice(adverbs)
    line += 1
print(str)

a boy played


---

### <font color="brown">NumPy - Numerical Python</font>
https://numpy.org/

#### A key feature of numpy is the n-dimensional array object, or ndarray, which allows you to perform mathematical operations on entire arrays as you would with single numeric values

---

In [1]:
import numpy as np

#### <font color="brown">Creating an ndarray from a list</font>

In [28]:
data1 = [3, 2.8, 19, 5, 17.6, 5.1]
arr1 = np.array(data1)
arr1

array([ 3. ,  2.8, 19. ,  5. , 17.6,  5.1])

**All items in an ndarray MUST BE OF THE SAME TYPE (unlike Python list)**

In [29]:
num1 = np.array([1,2,3,4,5])
str1 = np.array(['cs112','cs210','cs211'])
bool1 = np.array([True,True,False,True])
print(num1)
print(str1)
print(bool1)

[1 2 3 4 5]
['cs112' 'cs210' 'cs211']
[ True  True False  True]


In [30]:
# mix int with float
mixedarr2 = np.array([1,2.5])
print(mixedarr2)

[1.  2.5]


**Example above shows that if list has only int and float, then int is converted to float**

In [31]:
# mix numeric with boolean
mixedarr2 = np.array([1,2.3,True,False])
print(mixedarr2)

[1.  2.3 1.  0. ]


**Example above shows that if boolean is mixed with numbers, it is converted to a number (0 for False, 1 for True)**

In [32]:
# mix string with other types
mixedarr1 = np.array([1,2.3,'1',True])
print(mixedarr1)

['1' '2.3' '1' 'True']


**Example above shows that if at least one of the items is a string, other items are cooerced to strings**

---

#### <font color="brown">Multiplying and adding on an ndarray</font>

In [5]:
# multiplying a Python list by a scalar repeats it (just like string)
data1*2

[3, 2.8, 19, 5, 17.6, 5.1, 3, 2.8, 19, 5, 17.6, 5.1]

In [6]:
# multiplying a numpy array multiplies all items individually 
arr1*2

array([ 6. ,  5.6, 38. , 10. , 35.2, 10.2])

In [7]:
# adding a scalar to a Python list?
data1 + 2

TypeError: can only concatenate list (not "int") to list

In [8]:
# adding a scalar to a numpy array
arr1 + 2

array([ 5. ,  4.8, 21. ,  7. , 19.6,  7.1])

In [9]:
# adding two lists in Python appends all items of second to first
data1 + data1

[3, 2.8, 19, 5, 17.6, 5.1, 3, 2.8, 19, 5, 17.6, 5.1]

In [10]:
# adding two numpy arrays does element-wise addition
arr1 + arr1

array([ 6. ,  5.6, 38. , 10. , 35.2, 10.2])

In [31]:
# when int is mixed with float, all items are converted to float
mixedarr2 = np.array([1,2.5])
print(mixedarr2)

[1.  2.5]


---

#### <font color="brown">Every ndarray has a type</font>
Type is accessed through ndarray.dtype property (not function)

In [37]:
data1 = [3, 2.8, 19, 5, 17.6, 5.1]
arr1 = np.array(data1)
print(arr1.dtype)

num1 = np.array([1,2,3,4,5])
print(num1.dtype)

str1 = np.array(['cs112','cs210','cs211'])
print(str1.dtype)

bool1 = np.array([True,True,False,True])
print(bool1.dtype)

float64
int64
<U5
bool


**U5 above means means Unicode, 5 characters. Actual bytes per character depends on platform**

In [35]:
str2 = np.array(['one','three','five','eleven'])
print(str2.dtype)

<U6


---

#### <font color="brown">Every ndarray has a shape</font>
Shape is accessed through ndarray.shape property (not function)

In [36]:
arr1.shape

(6,)

In [37]:
arr2d = np.array([[1,2,3],[4,5,6]])  # input is nested list
print(arr2d)
print(arr2d.dtype)
print(arr2d.shape)   # 2 rows, 3 columns

[[1 2 3]
 [4 5 6]]
int64
(2, 3)


In [38]:
print(arr2d.ndim)  # ndim gives number of rows

2


In [40]:
r,c = arr2d.shape
print(f'rows={r}, columns={c}')

rows=2, columns=3


**Nested lists of different lengths will give an unusual object, an array of two lists**

In [40]:
np.array([[1,2,3],[4,5,6,7]],dtype=object)  

array([list([1, 2, 3]), list([4, 5, 6, 7])], dtype=object)

---

#### <font color="brown">Creating boilerplate ndarrays using special NumPy functions</font>

**Array initialized to zeros**

In [25]:
# array initialized to zeros
zr = np.zeros(10)
zr

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [41]:
zr2d = np.zeros((5,3))  # 5 x 3 array field with zeros
zr2d

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [42]:
zr2d = np.zeros(5,3)   # won't work, shape argument must be a tuple, except for 1-d
zr2d 

TypeError: Cannot interpret '3' as a data type

**Array initialized to ones**

In [28]:
ones2d = np.ones((3,4))
ones2d

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [43]:
# use dtype argument to set type to int instead of default float
ones2d = np.ones((3,4),dtype=int)
ones2d

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

**Array initialized to empty (no particular value)**

In [46]:
np.empty((2,3,2))  # 3D

array([[[-1.72723371e-077, -1.72723371e-077],
        [ 3.45845952e-323,  0.00000000e+000],
        [ 0.00000000e+000,  0.00000000e+000]],

       [[-1.72723371e-077, -1.72723371e-077],
        [ 1.97626258e-323,  0.00000000e+000],
        [ 0.00000000e+000,  0.00000000e+000]]])

**<font color="red">not safe to assume that np.empty() will get you ones, or zeros, or anything specific</font>**

**Making zeros, ones, empty array out of another array's shape**

In [33]:
arr2d = np.array([[1,2,3],[4,5,6]])

In [34]:
np.ones_like(arr2d)

array([[1, 1, 1],
       [1, 1, 1]])

In [35]:
np.empty_like(arr2d)

array([[0, 0, 0],
       [0, 0, 0]])

**Identity matrix (square matrix with 1s on main diagonal)**

In [58]:
np.eye(3)  # single parameter because matrix is square

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

---

#### <font color="brown">NumPy functions arange and reshape</font>

**The arange function is the numpy array equivalent of Python range function**

In [5]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [3]:
np.arange(-3,3,3)

array([-3,  0])

In [6]:
np.arange(5,-2,-1)

array([ 5,  4,  3,  2,  1,  0, -1])

**Reshaping an ndarray**

In [39]:
np.arange(15).reshape(3,5)   # reshape can be used on any ndarray

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [40]:
arr2d = np.array([[1,2,3],[4,5,6]])

In [41]:
arr2d.reshape(6)

array([1, 2, 3, 4, 5, 6])

In [72]:
arr3d = np.arange(12).reshape(2,3,2)
print(arr3d)

[[[ 0  1]
  [ 2  3]
  [ 4  5]]

 [[ 6  7]
  [ 8  9]
  [10 11]]]


**In the above, the first parameter is the 3rd dimension, so 2 planes of 3x2**

In [43]:
# get 2nd row, 1st column of 1st plane
arr3d[0,1,0]

2

In [44]:
# alternatively, you can use this syntax
arr3d[0][1][0]

2

In [45]:
# get 3nd row, 2st column of 2nd plane
arr3d[1,2,1]

11

In [73]:
np.zeros_like(arr3d)

array([[[0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0]]])