In [235]:
import sys
from typing import List, Tuple, Any, Dict, Callable

In [3]:
""" DFS Algorithm: recursive call
1) init recursion limit in python
2) select start node
3) change visit FLAG for start node
4) iter to linked list for start node
5) recursive call to node in linked list for start node, if this node is not visited (FLAG = False)
"""

sys.setrecursionlimit(10**6)
start_node = 0
visit = [False for _ in range(9)]
graph = (
    (),
    (2, 3, 8),
    (1, 7),
    (1, 4, 5),
    (3, 5),
    (3, 4),
    [7],
    (2, 6, 8),
    (1, 7),
)  # immutable object: 가능 하다면 튜플로 선언, 

def dfs(src: int, visited: Tuple) -> None:
    visited[src] = True
    print(src, end=" ")
    for next_node in graph[src]:
        if not visited[next_node]:
            dfs(next_node, visited)

dfs(1, visit)

1 2 7 6 8 3 4 5 

In [4]:
""" BFS Algorithm
1) init data structure: from collections import deque
2) select start node
3) change visit FLAG to start node
4) insert start node to deque
5) enter while loop
    - popleft out in deque
    - iter to linked list for popleft out node
        - if next nodes are not visited
            - change visit FLAG to popleft out node
            - insert this node into deque
"""
from collections import deque

visit = [False for _ in range(9)]
def bfs(src: int, visited: Tuple) -> None:
    visited[src] = True
    q = deque([])
    q.append(src)
    while q:
        node = q.popleft()
        print(node, end=' ')
        for next_node in graph[node]:
            if not visited[next_node]:
                visited[next_node] = True
                q.append(next_node)
                
bfs(1, visit)

1 2 3 8 7 4 5 6 

In [5]:
def longest_palindrome(inputs: str) -> str:
    """ Sliding Window Algorithm
    1) init two window pointer
        - size: odd, even => 3, 2
    2) start at most left of string
    3) iter:
        - check if current state is palindrome or will be expanded state is palindrome
            - True: expand and check again current state, until not palindrome
            - False: save current state's length, init size of two window pointer, slide window
        - handling exception:
            1) given word is already palindrome
            2) given word's length is 1
"""
    def expand(left: int, right: int) -> str:
        while left >= 0 and right < len(inputs) and inputs[left] == inputs[right]:
            left -= 1
            right += 1
        return inputs[left+1:right]
    
    
    if len(inputs) < 2 or inputs == inputs[::-1]:
        return inputs
    
    result = ''
    for i in range(len(inputs)-1):
        result = max(
            result,
            expand(i, i+1),
            expand(i, i+2),
            key=len
        )
        print(f"num of iter: {i}")
        print(f"current longest state: {result}")
    return result

word = 'babba'
longest_palindrome(word)

num of iter: 0
current longest state: bab
num of iter: 1
current longest state: bab
num of iter: 2
current longest state: abba
num of iter: 3
current longest state: abba


'abba'

In [6]:
def remove_target_text(inputs: str, target: str) -> str:
    """ stack implementation in python for detecting & removing target text in given word or sequence
    1) init empty list
    2) insert, expand, append stream inputs to empty list
    3) do anything at most right part of inputs
    """
    stack = []
    for char in inputs:
        stack.append(char)
        if target == ''.join(stack[-len(target):]):
            del stack[-len(target):]
    result = ''.join(stack) if len(stack) else "FRULA"
    return result


text = 'mirkovC4nizCC44'
bomb = 'C4'
remove_target_text(text, bomb)

'mirkovniz'

In [7]:
""" Dijkstra Algorithm with heapq
"""

import heapq

def dijkstra(src: int, distance: List[int]) -> None:
    """ dijkstra algorithm: src node to rest of nodes
    1) select start node (o)
    2) init shortest-table (o)
        - start node are zero-init 
    3) select shortest-cost which is visit FLAG == False
        - selecting algorithm must be implemented with heapq
        - heapq sort guarantee max or min element sorting 
        - linear search will be restricted b time limit
    4) update additional path, created by current shortest path (o)
        - compare updated path's cost & past updated path
    5) iter above steps until complete
    """
    h = []
    heapq.heappush(h, (distance[src], src))
    while h:
        min_cost, node = heapq.heappop(h)
        for i in graph[node]:
            curr_cost, curr_node = i[0], i[1]
            cost = min_cost + curr_cost
            if cost < distance[curr_node]:
                distance[curr_node] = cost
                heapq.heappush(h, (cost, curr_node))

In [113]:
""" collections review
1) Counter
2) deque
3) default dict
4) namedtuple
"""

from collections import Counter, deque
from collections import defaultdict, namedtuple

# 1) Counter
test = 'aabcdacdbbbbaaaadddcccbcbcbcbcbcbbcbcbcbcbcdaadaabadbcdbcdacdbacdbacdbcadacbbcabcadb'
test_2 = 'aaaaabbbbcccccdddd'
# test = ['red', 'red', 'red', 'red', 'yellow', 'blue', 'blue', 'blue', 'green', 'green']
counter, counter_2 = Counter(test), Counter(test_2)

print(f"print counter dict: {counter}", end='\n\n')
print(f"print counter dict: {counter_2}", end='\n\n')

print(f"print counter dict key: {counter.keys()}", end='\n\n')
print(f"print counter dict value: {counter.values()}", end='\n\n')

print(f"print counter most_common: {counter.most_common(2)}")

print(f"print sorted test string: {''.join(sorted(counter.elements(), reverse=True))}")

print(f"print subtract two counter object: {counter - counter_2}")  # calculate: add, subtract ...
print(f"print OR object: {counter | counter_2}")  # logical calculate: in 3.10
print(f"print AND two counter object: {counter & counter_2}")  # logical calculate: in 3.10

counter_2.clear()
print(f"After applying clear to Counter: {counter_2}")  # for recycling object

print counter dict: Counter({'b': 26, 'c': 24, 'a': 19, 'd': 15})

print counter dict: Counter({'a': 5, 'c': 5, 'b': 4, 'd': 4})

print counter dict key: dict_keys(['a', 'b', 'c', 'd'])

print counter dict value: dict_values([19, 26, 24, 15])

print counter most_common: [('b', 26), ('c', 24)]
print sorted test string: dddddddddddddddccccccccccccccccccccccccbbbbbbbbbbbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaa
print subtract two counter object: Counter({'b': 22, 'c': 19, 'a': 14, 'd': 11})
print OR object: Counter({'b': 26, 'c': 24, 'a': 19, 'd': 15})
print AND two counter object: Counter({'a': 5, 'c': 5, 'b': 4, 'd': 4})
After applying clear to Counter: Counter()


In [114]:
""" collections.deque
deque has not slicing operator, so to implement deque slicing, use a similar approach applying rotate()
to bring a target element to the left side of the deque. 
Remove old entries with popleft(), add new entries with extend(), and then reverse the rotation. 
With minor variations on that approach, it is easy to implement 
Forth style stack manipulations such as dup, drop, swap, over, pick, rot, and roll.
"""

q = deque(['src'])
print(f"initialize deque data structure: {q}", end='\n\n')

q.append(list(range(5)))
print(f"append right deque: {q}", end='\n\n')

q.appendleft(list(range(-5, 1, 1)))
print(f"append left deque: {q}", end='\n\n')

q.extend([6])
print(f"extend right deque: {q}", end='\n\n')

q.extendleft([-1])
print(f"extend left deque: {q}", end='\n\n')

q.pop()
print(f"pop right deque: {q}", end='\n\n')

q.popleft()
print(f"pop left deque: {q}", end='\n\n')

q.rotate(2)
print(f"rotate deque clockwise: {q}", end='\n\n')

q.rotate(-2)
print(f"rotate deque reverse clockwise: {q}", end='\n\n')

q.reverse()
print(f"reverse deque: {q}", end='\n\n')

q.clear()
print(f"clear deque for recycling object: {q}", end='\n\n')

initialize deque data structure: deque(['src'])

append right deque: deque(['src', [0, 1, 2, 3, 4]])

append left deque: deque([[-5, -4, -3, -2, -1, 0], 'src', [0, 1, 2, 3, 4]])

extend right deque: deque([[-5, -4, -3, -2, -1, 0], 'src', [0, 1, 2, 3, 4], 6])

extend left deque: deque([-1, [-5, -4, -3, -2, -1, 0], 'src', [0, 1, 2, 3, 4], 6])

pop right deque: deque([-1, [-5, -4, -3, -2, -1, 0], 'src', [0, 1, 2, 3, 4]])

pop left deque: deque([[-5, -4, -3, -2, -1, 0], 'src', [0, 1, 2, 3, 4]])

rotate deque clockwise: deque(['src', [0, 1, 2, 3, 4], [-5, -4, -3, -2, -1, 0]])

rotate deque reverse clockwise: deque([[-5, -4, -3, -2, -1, 0], 'src', [0, 1, 2, 3, 4]])

reverse deque: deque([[0, 1, 2, 3, 4], 'src', [-5, -4, -3, -2, -1, 0]])

clear deque for recycling object: deque([])


In [115]:
""" collections.defaultdict
defaultdict is used to set dict.value dtypes
"""

d = defaultdict(list)
s = [('yellow', 3), ('yellow', 1), ('blue', 2), ('red', 20), ('blue', 4), ('red', 1)]

for k, v in s:
    d[k].append(v)

print(f"print default dict: {d.items()} ")
print(f"print default dict: {sorted(d.keys(), key=lambda x: x[0])} ")
print(f"print default dict: {sorted(d.values(), key=lambda x: x[0])} ")

print default dict: dict_items([('yellow', [3, 1]), ('blue', [2, 4]), ('red', [20, 1])]) 
print default dict: ['blue', 'red', 'yellow'] 
print default dict: [[2, 4], [3, 1], [20, 1]] 


In [134]:
""" collections.nametuple
nametuple is used to assign name for each position index
namedtuple(
    'name',
    ['arg1', 'arg2', 'arg3', ... 'argN']
)
"""

Rectangle = namedtuple('Rectangle', ['x', 'y', 'z'])
r_point = Rectangle(
    x=11,
    y=22,
    z=33,
)

print(f"initialize named tuple: {r_point}", end='\n\n')
print(f"named tuple is iterable object: {[i for i in r_point]}", end='\n\n')

test = [1, 2, 3]
print(f"convert pure list to named tuple: {Rectangle._make(test)}", end='\n\n')
print(f"convert named tuple to pure dict: {r_point._asdict()}", end='\n\n')
print(f"replace some values in named tuple: {r_point._replace(x=111)}")

initialize named tuple: Rectangle(x=11, y=22, z=33)

named tuple is iterable object: [11, 22, 33]

convert pure list to named tuple: Rectangle(x=1, y=2, z=3)

convert named tuple to pure dict: {'x': 11, 'y': 22, 'z': 33}

replace some values in named tuple: Rectangle(x=111, y=22, z=33)


In [ ]:
""" collections.OrderDict
사전 자료형 클래스가 아이템의 삽입 순서를 기억하게 되면서, 굳이 안쓰는 모듈
"""

In [174]:
""" heapq review
heapq is used to implement global max/min sorting or priority queue
heap does not guarantee sorted state to all element in heap
but this guarantee max/min state of heap, default heap sorting direction is min sorting

if you want to guarantee of sorting state for all elements in heap,
you use other array with heappop method
this method keep sorting state because it always re-sorting min/max element in every iterations
1) heapq
    - max heapsort
    - min heapsort
    - priority queue
2) heapq basic operator
"""

""" 1-1) max heapsort """
import random

h = []

for _ in range(10):
    heapq.heappush(h, -random.randint(0, 100))

print(f"max heap result: {h}", end='\n\n')

result = []
for _ in range(len(h)):
    result.append(-heapq.heappop(h))

print(f"max heapsort result: {result}", end='\n\n')

max heap result: [-99, -98, -89, -95, -43, -24, -73, -56, -3, -19]

max heapsort result: [99, 98, 95, 89, 73, 56, 43, 24, 19, 3]


In [173]:
""" 1-2) min heap """
h = []

for _ in range(10):
    heapq.heappush(h, random.randint(0, 100))

print(f"min heap result: {h}")

result = []
for _ in range(len(h)):
    result.append(heapq.heappop(h))

print(f"min heapsort result: {result}")

min heap result: [1, 3, 9, 18, 76, 73, 16, 79, 69, 86]
min heapsort result: [1, 3, 9, 16, 18, 69, 73, 76, 79, 86]


In [177]:
""" 1-3) priority queue
you must pass tuple, which is first pos index regarded as priority of object

Q1. 정렬 안정성: 우선순위가 같은 두 작업이 원래 추가된 순서대로 반환되려면
우선순위가 같고 작업에 기본 비교 순서가 없는 경우 (우선순위, 작업) 쌍에 대한 튜플 비교가 중단
비교가 가능한 객체가 존재 하는한 하위로 계속 내려 가면서 정렬을 끝까지 수행 
"""
h = []

for _ in range(10):
    heapq.heappush(h, (random.randint(0, 10), random.randint(0, 10)))

print(f"min heap result: {h}")

result = []
for _ in range(len(h)):
    result.append(heapq.heappop(h))

print(f"min heapsort result: {result}")

min heap result: [(1, 6), (4, 0), (3, 9), (5, 3), (6, 7), (9, 1), (7, 9), (7, 8), (5, 6), (8, 0)]
min heapsort result: [(1, 6), (3, 9), (4, 0), (5, 3), (5, 6), (6, 7), (7, 8), (7, 9), (8, 0), (9, 1)]


In [181]:
""" 2) heapq basic operator
- heappush(h: List, item) => push element and then heap sorting for min/max element in heap
- heappop(h: List) => pop min/max element in heap
- heapify(x: List) => make pure list to heap
- heapreplace(h:List, item) => heappop() element and then heappush() item
- heappushpop(h: List, item) => heappush() item and then heappop()
- heapq.nlargest(n, iterable, key=None)
- heapq.nsmallest(n, iterable, key=None)
"""

print(f"(max) Top-3 heapsort result: {heapq.nlargest(3, result)}", end='\n\n')
print(f"(min) Top-3 heapsort result: {heapq.nsmallest(3, result)}")

(max) Top-3 heapsort result: [(9, 1), (8, 0), (7, 9)]

(min) Top-3 heapsort result: [(1, 6), (3, 9), (4, 0)]


In [240]:
""" bisect review
이진 탐색 모듈은 기본적으로 정렬된 Iterable 객체 입력을 전제로 사용, 정렬된 상태를 유지하고 싶을 때 사용
문제 풀다 보면 이거 저거 삽입하고 삭제하면서 다시 정렬해줘야 하는 경우가 생기는데, 이게 생각보다 시간 복잡도를 잡아먹음

1) bisect_left(a: List, x: Any):
정렬된 순서를 유지하기 위해 a에서 x의 삽입 지점을 찾습니다. x가 이미 a에 있는 경우, 삽입 지점은 기존 항목의 앞(왼쪽)이 됩니다. 
반환 값은 a가 이미 정렬되어 있다고 가정할 때 list.insert()의 첫 번째 매개변수로 사용하기에 적합합니다.
정렬 순서를 해치지 않는, 원하는 순서에 빠르게 원소를 삽입에 좋은 메서드

중복 원소가 있는 경우, 인덱스 가장 빠른 원소의 왼쪽을 리턴함

2) bisect_right(a: List, x: Any):

중복 원소가 있는 경우, 인덱스 가장 느린 원소의 오른쪽을 리턴
=> 이거 나중에 SpanBERT에 적용하면 좋을 듯.
"""
from bisect import bisect_left, bisect_right

test = [1, 2, 3, 3, 3, 4, 4, 8, 9]

print(f"bisect left method: {bisect_left(test, 3)}")
print(f"bisect right method: {bisect_right(test, 3)}")

bisect left method: 2
bisect right method: 5


In [15]:
""" itertools review
1) 순열
2) 조합
3) 중복 순열
4) 중복 조합
"""

from itertools import permutations, combinations  # 순열, 조합
from itertools import product, combinations_with_replacement  # 중복 순열, 중복 조합


data = list(range(5))
p, c = list(permutations(data, 2)), list(combinations(data, 2))
o_p, o_c = list(product(data, repeat=2)), list(combinations_with_replacement(data, r=2))
print(f"permutation result: {p}, {len(p)}", end='\n\n')
print(f"combination result: {c}, {len(c)}", end='\n\n')
print(f"over-lapping permutation result: {o_p}, {len(o_p)}", end='\n\n')
print(f"over-lapping combination result: {o_c}, {len(o_c)}", end='\n\n')

permutation result: [(0, 1), (0, 2), (0, 3), (0, 4), (1, 0), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 3), (2, 4), (3, 0), (3, 1), (3, 2), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3)], 20

combination result: [(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)], 10

over-lapping permutation result: [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)], 25

over-lapping combination result: [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4), (2, 2), (2, 3), (2, 4), (3, 3), (3, 4), (4, 4)], 15


In [213]:
# bytes_size = inputs.input_ids.element_size() * inputs.input_ids.numel()
# print(bytes_size)

44


In [245]:
def count_by_range(l: List, left_value: Any, right_value: Any) -> int:
    """ Return the count of values within a specified range among the elements inside a sorted list
    Args:
        l: list object for calculating number of elements in span boundary
        left_value: left boundary value
        right_value: right boundary value
    """
    right = bisect_right(
        l, 
        right_value
    )
    left = bisect_left(
        l,
        left_value
    )
    return right - left

list_a = [1, 2, 3, 3, 3, 3, 4, 4, 8, 9]
count_by_range(list_a, 0, 3)

6

In [272]:
""" Mutable Object in pure python review
Mutable Object can be changed in-place, if you want to change mutable object, you don't need to re-assign new object, this behavior will be occurred to change mutable object in same memory space
but, you must be careful to use mutable object, because this object can be changed in-place
1) List
2) Dict
"""
# 1) replace method in list

test = [1,2,3,4,5,6,7,8]
test[0:3] = [0, 0, 0, 0]  # must be same length with target slice size
print(f"replace method in mutable object: {test}")

test[0:3] = '1234'  # must be same length with target slice size
print(f"replace method in mutable object: {test}")

test[0:3] = 't'
print(f"replace method in mutable object: {test}")

test[::1] = 't'
print(f"replace method in mutable object: {test}")

replace method in mutable object: [0, 0, 0, 0, 4, 5, 6, 7, 8]
replace method in mutable object: ['1', '2', '3', '4', 0, 4, 5, 6, 7, 8]
replace method in mutable object: ['t', '4', 0, 4, 5, 6, 7, 8]
replace method in mutable object: ['t']


In [ ]:
""" Immutable Object in pure python review
Immutable Object can not be changed in-place, if you want to change immutable object, you must re-assign new object, this behavior will be occurred to copy immutable object in other memory space

1) Tuple
생성자는 이터러블의 항목과 동일한 순서로 동일한 항목이 있는 튜플을 생성합니다. 
이터러블은 시퀀스, 반복을 지원하는 컨테이너 또는 이터레이터 객체일 수 있습니다. 
iterable이 이미 튜플이면 변경되지 않고 반환됩니다. 
예를 들어, tuple('abc')은 ('a', 'b', 'c')를 반환하고 tuple([1, 2, 3] )은 (1, 2, 3)을 반환합니다. 
인자가 지정되지 않으면 생성자는 새로운 빈 튜플인 ()을 생성합니다.

2) Set
3) int, float, str, bool ...
"""

In [289]:
""" str object in pure python review
str.capitalize() => capitalize first character in string
str.upper() => capitalize all characters in string
str.lower() => lower all characters in string
str.count() => count number of target character in string
str.find() => find index of target character in string
str.startswith() => check if string starts with target character
str.endswith() => check if string ends with target character
str.find() => find index of target character in string, if you want to current state which is whole word contain target sub words, you must use in operator

is뭐시기로 시작하는거 죄다 데이터 유효성 검사용
"""

text = "abcdeeeeee"
text.startswith('a')

True