# EC2202 Sets and Maps

**Disclaimer.**
This code examples are based on
1. [KAIST CS206 (Professor Otfried Cheong)](https://otfried.org/courses/cs206/)
2. [GeeksForGeeks](https://practice.geeksforgeeks.org/)
3. Coding Interviews

In [None]:
import doctest
import time
import math

## Sets

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/eHArNzW0gdQ" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/Fkzw3we-OT4" title="YouTube video player" frameborder="0" allowfullscreen></iframe>

갑자기 class 연습하기..

In [None]:
class VendingMachine:
  """A vending machine that vends some product for some price.

    >>> v = VendingMachine('candy', 10)
    >>> v.vend()0
    'Nothing left to vend. Please restock.'
    >>> v.add_funds(15)
    'Nothing left to vend. Please restock. Here is your $15.'
    >>> v.restock(2)
    'Current candy stock: 2'
    >>> v.vend()
    'Please add $10 more funds.'
    >>> v.add_funds(7)
    'Current balance: $7'
    >>> v.vend()
    'Please add $3 more funds.'
    >>> v.add_funds(5)
    'Current balance: $12'
    >>> v.vend()
    'Here is your candy and $2 change.'
    >>> v.add_funds(10)
    'Current balance: $10'
    >>> v.vend()
    'Here is your candy.'
    >>> v.add_funds(15)
    'Nothing left to vend. Please restock. Here is your $15.'

    >>> w = VendingMachine('soda', 2)
    >>> w.restock(3)
    'Current soda stock: 3'
    >>> w.restock(3)
    'Current soda stock: 6'
    >>> w.add_funds(2)
    'Current balance: $2'
    >>> w.vend()
    'Here is your soda.'
  """

  def __init__(self, product, price):
    self.product = product
    self.price = price
    self.stock = 0
    self.balance = 0
  # self.속성 = 매개변수

  def restock(self, n):
    self.stock += n
    return f'Current {self.product} stock: {self.stock}'

  def add_funds(self, n):
    if self.stock == 0:
      return f'Nothing left to vend. Please restock. Here is your ${n}.'
    self.balance += n
    return f'Current balance: ${self.balance}'

  def vend(self):
    if self.stock == 0:
      return f'Nothing left to vend. Please restock.'
    difference = self.price - self.balance
    if difference > 0:
      return f'Please add ${difference} more funds.'
    message = f'Here is your {self.product}'
    if difference != 0:
      message += f' and ${-difference} change'
    self.balance = 0
    self.stock -= 1
    return message + '.'

### Implementing the Set ADT Using a Python list

In [None]:
class set():
  def __init__(self, items=None):
    self._data = [] # empty list = empty set
    if items:
      for item in items:
        self.add(item) # 후에 구현될 add 라는 메소드. 이 메소드로 set에 아이템을 넣어줌

  def __contains__(self, item): # item이 특정 set에 있는지 없는지 확인하는 default method
    return item in self._data # O(N) -> hashing으로 구현하면 O(1)

  def __len__(self):
    return len(self._data)

  def add(self, item): # list와 달리 set은 중복 불가. 이 조건 고려 필요.
    if item not in self._data:
      self._data.append(item)

  def remove(self, item):
    if item in self._data:
      self._data.remove(item)
    else:
      raise KeyError(item)

  # 에러가 발생될 수 있는 상황을 두는 것이 좋지 않을 수 있음
  def discard(self, item):
    if item in self._data:
      self._data.remove(item)
    # 없으면 아무것도 하지 않음.

  ### ppp exercise: equal / compare / is_superset / union

  def __eq__(self, t): # s == t
    # sol.1. s < t & s > t 이면 equal:
    return self.is_subset(t) and self.is_superset(t)

    # sol.2. compare length
    if len(self) != len(t):
      return False
    return self.is_subset(t)

  def is_subset(self, t):
    # s < t
    for item in self._data:
      if item not in t: # item 이 하나라도 t에 없게 되면
        return False
    return True # 모두 다 들어있으면 True

  def is_superset(self, t):
    # s > t
    return t.is_subset(self)

  def union(self, t): # s, t의 합집합을 계산하고 새로운 set을 만들어서 return
    new_set = set()
    new_set._data.extend(self._data)
    for item in t:
      new_set.add(item)
    return new_set

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/pCShafI6jMc" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/XjoqMXMgNtU" title="YouTube video player" frameborder="0" allowfullscreen></iframe>

### Applications

#### A Simple Spell Checker

In [None]:
from google.colab import drive
drive.mount('/content/drive')

def read_words():
  s = open("/content/drive/My Drive/수업자료/[EC2202] Data Structures/01_lecture_slides/words-5000.txt", "r")
  words = set()
  for w in s.readlines():
    words.add(w.strip())
  s.close()
  return words  # = dictionary of valid words

def spell(given_word): # 사전을 구축
  words = read_words() # 불러와진 단어들로 구성
  if given_word in words:
    return True
  return False

Mounted at /content/drive


In [None]:
spell(hello)

#### **'ppp' Exercise** Sieve of Erathosthenes
- This is to find the prime numbers smaller than given N.

In [None]:
### ppp exercise ###

In [None]:
# n 보다 작은 소수를 찾는 것.
# 1번째: 2의 배수를 지워줌
# 2번째: 3의 배수를 지워줌
# 3번째: 5의 배수를 지워줌
# 4번째: 7의 배수를 지워줌
# ...

In [None]:
# c.f.
# 리스트에 대해서 iteration 하는 중간에 list의 내용을 변화하면 에러로 이어짐.
# ex. iteration이 정상적으로 작동하지 않음 (python에서 에러로 표시되지는 않지만,,)
# ex.
num_list = list(range(10))
print(num_list)
for item in num_list:
  print(item)
  num_list.remove(item)
print(num_list)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
0
2
4
6
8
[1, 3, 5, 7, 9]


In [None]:
# naive implementation
def sieve(n):
  prime_numbers = set(range(2, n))
  for i in range(2, n):
    k = 2
    while i * k < n:
      prime_numbers.discard(i * k)
      k += 1
  return prime_numbers

In [None]:
# efficient implementation
# n = sqrt(n) * sqrt(n)
def sieve(n):
  nums = set(range(2, n))
  for num in range(2, int((n+1)**(0.5))):
    if num in nums:
      for k in range(num * 2, n+1, num):
        if k in nums:
          nums.remove(k)
  return nums

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/gppsSfE80NY" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/zHtg3cJVPb4" title="YouTube video player" frameborder="0" allowfullscreen></iframe>

## Maps

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/I5UMIL_bbXY" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/Gi5JXlcWzsg" title="YouTube video player" frameborder="0" allowfullscreen></iframe>

### Implementing the Map ADT

In [33]:
# using Python List, Not quite Efficient
# Hashing will be used in the next lecture to implement more efficiently
class dict():
  def __init__(self):
    self._data = []

  def __len__(self):
    return len(self._data)

  def __setitem__(self, k, value):
    i = self._findkey(k) # findkey method는 뒤에서 구현

    if i >= 0:
      self._data[i] = (k, value) # i번째 자리에 있다는 뜻. tuple type
    else:                        # 값이 0보다 작으면 item이 들어있지 않다는 뜻.
      self._data.append(k, value)
      # di = dict()
      # di['5'] = [1, 2, 3, 4, 5]

  def _findkey(self, k): # findkey method 구현
    for i in range(len(self._data)):
      if k == self._data[i][0]:
        return i
      return -1 # 일치하는 key가 없으면 -1 return

  def __contains__(self, k):
    return self._findkey(k) >= 0

  def __getitem__(self, k):
    # print(di['5']) => [1, 2, 3, 4 ,5]
    i = self._findkey(k)
    if i >= 0:
      return self._data[i][1] # i번째의 1st 원소가 value, 0번째는 key
    else:
      return keyError(k) # i < 0 == key가 없는 것

  def get(self, k, v0 = None): #getitem method와 거의 유사
    i = self._findkey(k)
    if i >= 0:
      return self._data[i][1]
    else:
      return v0

In [34]:
# 교수님 코드
class dict():
  def __init__(self):
    self._data = []

  def __len__(self):
    return len(self._data)

  def __setitem__(self, k, value):
    i = self._findkey(k)
    if i >= 0:
      self._data[i] = (k, value)
    else:
      self._data.append((k, value))
    # di = dict()
    # di['5'] = [1, 2, 3, 4, 5]

  def _findkey(self, k):
    for i in range(len(self._data)):
      if k == self._data[i][0]:
        return i
    return -1

  def __contains__(self, k):
    return self._findkey(k) >= 0

  ################################
  ######## 'ppp' exercise ########
  ################################
  def __getitem__(self, k):
    # print(di['5']) => [1, 2, 3, 4, 5]
    pass

  ################################
  ######## 'ppp' exercise ########
  ################################
  def get(self, k, v0=None):
    pass

  def keys(self):
    return _MapIterator(self._data)

  def __repr__(self):
    s = "ListMap("
    sep = ""
    for k, v in self._data:
      s += sep + repr(k) + ": " + repr(v)
      sep = ","
    return s + ")"

  def __iter__(self):
    return _MapIterator(self._data)

class _MapIterator():
  def __init__(self, d):
    self._d = d
    self._current = 0

  def __iter__(self):
    return self

  def __next__(self):
    if self._current < len(self._d):
      key = self._d[self._current][0]
      self._current += 1
      return key
    else:
      raise StopIteration

### Applications

#### Converting a DNA sequence to a protein sequence

In [None]:
# Rosalind PROT
# Convert a DNA sequence to the corresponding sequence of proteins
#
# Example input: AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA
# Output: MAMAPRTEINSTRING

codon = { "UUU" : "F",    "CUU" : "L", "AUU" : "I", "GUU" : "V",
          "UUC" : "F",    "CUC" : "L", "AUC" : "I", "GUC" : "V",
          "UUA" : "L",    "CUA" : "L", "AUA" : "I", "GUA" : "V",
          "UUG" : "L",    "CUG" : "L", "AUG" : "M", "GUG" : "V",
          "UCU" : "S",    "CCU" : "P", "ACU" : "T", "GCU" : "A",
          "UCC" : "S",    "CCC" : "P", "ACC" : "T", "GCC" : "A",
          "UCA" : "S",    "CCA" : "P", "ACA" : "T", "GCA" : "A",
          "UCG" : "S",    "CCG" : "P", "ACG" : "T", "GCG" : "A",
          "UAU" : "Y",    "CAU" : "H", "AAU" : "N", "GAU" : "D",
          "UAC" : "Y",    "CAC" : "H", "AAC" : "N", "GAC" : "D",
          "UAA" : "Stop", "CAA" : "Q", "AAA" : "K", "GAA" : "E",
          "UAG" : "Stop", "CAG" : "Q", "AAG" : "K", "GAG" : "E",
          "UGU" : "C",    "CGU" : "R", "AGU" : "S", "GGU" : "G",
          "UGC" : "C",    "CGC" : "R", "AGC" : "S", "GGC" : "G",
          "UGA" : "Stop", "CGA" : "R", "AGA" : "R", "GGA" : "G",
          "UGG" : "W",    "CGG" : "R", "AGG" : "R", "GGG" : "G" }

rna = input("mRNA sequence> ")

proteins = []

i = 0
transcribing = False
while i < len(rna) - 3:
  cod = rna[i:i+3]
  p = codon[cod]
  if cod == "AUG":
    transcribing = True
  if p == "Stop":
    transcribing = False
  if transcribing:
    proteins.append(p)
  i += 3

print("".join(proteins))

#### **'ppp' Exercise** [Amazon] Longest subarray having sum k

In [36]:
def long_subarr_k(arr, k):
  '''Given an array arr containing len(arr) integers and an integer k,
  long_subarr_k finds
    1) the length of the longest subarray
       with the sum of the elements equal to the given value k, and
    2) the list of elements summing to k.
  >>> long_subarr_k([10, 5, 2, 7, 1, 9], 15)
  (4, [5, 2, 7, 1])
  >>> long_subarr_k([-1, 2, 3], 6)
  0
  >>> long_subarr_k([-5, 8, -14, 2, 4, 12], -5)
  (5, [-5, 8, -14, 2, 4])
  '''
  result = {} # 결과를 저장할 result dictionary
  max_len = 0

  # very naive implementation, checks every case
  for i in range(len(arr)):
    cur_sum = 0
    for j in range(i, len(arr)):
      cur_sum += arr[j]
      if cur_sum == k:
        num = j - i + 1
        result[num] = arr[i:j+1] # max_len => key: subarr => value
        if num > max_len:
          max_len = num
  if max_len == 0:
    return 0
  #else
  return (max_len, result[max_len])

In [38]:
doctest.run_docstring_examples(long_subarr_k, globals(), False, __name__)

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/X_xCSkCoR3w" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/0v3ODG1grAw" title="YouTube video player" frameborder="0" allowfullscreen></iframe>