# 2021-04-29 Daily Practice

- [x] Practice: algorithms and data structures
- [x] Learn: Data Science Design Manual
- [x] Build: Savor

---

## Practice: LRU Cache

> [146 LRU Cache on LeetCode](https://leetcode.com/problems/lru-cache/)

Design a data structure that follows the constraints of a Least Recently Used (LRU) cache.

Implement the LRUCache class:

- `LRUCache(int capacity)` Initialize the LRU cache with positive size capacity.
- `int get(int key)` Return the value of the key if the key exists, otherwise return -1.
- `void put(int key, int value)` Update the value of the key if the key exists. Otherwise, add the key-value pair to the cache. If the number of keys exceeds the capacity from this operation, evict the least recently used key.

In [None]:
class LRUCache:

    def __init__(self, capacity: int):
        """Initialize the LRU cache with positive size capacity."""
        from collections import deque

        self.capacity = capacity
        # Storage will need to be both ordered (by recency) / indexable and hold key-value pairs
        # Or, use a double storage: one for each of the above functions
        self.ordered_keys = deque()  # Ordered
        self.key_values = {}  # Key-value pairs; also means no iteration is needed

    def get(self, key: int) -> int:
        """Return the value of the key if the key exists, otherwise return -1."""
        # Try to get the key from the key_values dict using .get() method
        val = self.key_values.get(key) # If doesn't exist, will return None by default
        if val is not None:  # If exists, set key-value pair as most recently used
            # If ordered_keys index of val = 0, then already most recently used; do nothing
            if self.ordered_keys.index(key) != 0:
                self.ordered_keys.remove(key)  # Remove
                self.ordered_keys.appendleft(key) # Replace at head (most recent)
            return val
        else:
            return -1

    def put(self, key: int, value: int) -> None:
        """Update the value of the key if the key exists. Otherwise, add the key-value pair to the cache.
        If the number of keys exceeds the capacity from this operation, evict the least recently used key.
        """
        # Look for key in cache; if exists, update value and set to most recently used
        if key in self.key_values:
            self.key_values[key] = value  # Update value for the key
            if self.ordered_keys.index(key) != 0:
                self.ordered_keys.remove(key)
                self.ordered_keys.appendleft(key)
        else:  # If not exists, ...
            if len(self.ordered_keys) == self.capacity:  # If at capacity already, ...
                # Pop least recently used key from ordered_keys
                lru_key = self.ordered_keys.pop()
                del self.key_values[lru_key]  # Remove from key_values
            self.ordered_keys.appendleft(key)  # Add to head (most recent) of ordered_keys
            self.key_values[key] = value  # Add to key_values

In [30]:
# TODO: implement doubly-linked list and LRU cache using DLL as storage
class ListNode:

    def __init__(self,
        key_val: tuple,
        prev = None,
        next = None,
    ):
        self.key_val = key_val
        self.prev = prev
        self.next = next

    def __str__(self) -> str:
        return str(self.key_val)

    def __repr__(self) -> str:
        return str(self.key_val)


class DoublyLinkedList:

    def __init__(self):
        # Initiate head, tail, and length
        self.head = None
        self.tail = None
        self.length = 0

    def __len__(self) -> int:
        return self.length

    def __str__(self) -> str:
        list_str = []
        cur_node = self.head
        while cur_node:
            list_str.append(str(cur_node))
            cur_node = cur_node.next
        return f"{' -> '.join(list_str)}"

    def __repr__(self) -> str:
        list_str = []
        cur_node = self.head
        while cur_node:
            list_str.append(str(cur_node))
            cur_node = cur_node.next
        return f"{' -> '.join(list_str)}"

    def append_left(self, key_val: tuple) -> None:
        """Appends tuple(key-value) to left side (head) of list."""
        # Create new node; next set to current head
        new_node = ListNode(key_val, next=self.head)
        if self.head:  # If head exists, set new node as head
            self.head.prev = new_node  # Set head.prev to new node
        else:  # If head doesn't exist, no items in the list
            self.tail = new_node  # Set new node as head and tail
        self.head = new_node
        self.length += 1  # Increment length

    def append_right(self, key_val: tuple) -> None:
        """Appends tuple(key-value) to right side (tail) of list."""
        # Create new node; prev set to current tail
        new_node = ListNode(key_val, prev=self.tail)
        if self.tail:  # If tail exists, set new node as tail
            self.tail.next = new_node  # Set tail.next to new node
        else:  # If tail does not exist, no items in list
            self.head = new_node  # Set as head and tail
        self.tail = new_node
        self.length += 1  # Increment length
    
    def pop_left(self) -> tuple:
        """Pops leftmost (head) node from list, returning its value."""
        head = self.head  # Hold head as local variable
        self.head = head.next  # Set head to head.next
        # Remove head from list by setting new head's prev to None
        self.head.prev = None
        self.length -= 1  # Decrement length
        return head.value  # Return value
    
    def pop_right(self) -> tuple:
        """Pops rightmost (tail) node from list, returning its value."""
        tail = self.tail  # Hold tail as local variable
        self.tail = tail.prev  # Set tail to tail.prev
        # Remove tail from list by setting new tail's next to None
        self.tail.next = None
        self.length -= 1  # Decrement length
        return tail.value  # Return value

    def get(self, key: int) -> int:
        """Retrieve value at key. Returns None if key is not found."""
        # Iterate through nodes, checking keys
        cur_node = self.head  # Start with head as current node
        while cur_node:  # while current node exists...
            if cur_node.value[0] == key:  # Check if node's key is a match
                # If a match is found, return the value (break)
                return cur_node.value[1]
            else:  # If not match, iterate
                cur_node = cur_node.next
        return None  # If no match is found in list, return None

    def pop_by_key(self, key: int) -> tuple:
        """Removes key from list. Return None if not found."""
        # Iterate through nodes, checking keys
        cur_node = self.head  # Start with head as current node
        while cur_node:  # while current node exists...
            if cur_node.value[0] == key:  # If match, remove
                cur_node.prev.next = cur_node.next  # cur.prev.next to cur.next
                cur_node.next.prev = cur_node.prev # cur.next.prev to cur.prev
                self.length -= 1  # Decrement length
                return cur_node.value
            else:  # If not a match, iterate
                cur_node = cur_node.next
        return None  # If no match found, return None

    def update_or_append_left(self, key_val: tuple) -> None:
        """If key exists, update value. Otherwise, add key-value pair as new node at head."""
        # Look for the key
        cur_node = self.head
        while cur_node:
            if cur_node.value[0] == key_val[0]:  # It's a match!
                cur_node.value[1] = key_val[1]  # Update value
                # Make this key-val pair the most recently used
                self.pop_by_key(key_val[0])
                self.append_left(key_val)
                return
            else:  # If not a match, keep digging, Watson!
                cur_node = cur_node.next
        self.append_left(key_val)  # If no match exists, append to left and increment length
        self.length += 1


class LRUCache:

    def __init__(self, capacity: int):
        """Initialize the LRU cache with positive size capacity."""
        self.capacity = capacity
        self.storage = DoublyLinkedList()

    def get(self, key: int) -> int:
        """Return the value of the key if the key exists, otherwise return -1."""
        key_val = self.storage.pop_by_key(key)  # Try to get key-value
        if key_val:  # If key exists, set key to most most recently used (head)
            self.storage.append_left(key_val)
            return key_val[1]
        else:
            return -1

    def put(self, key: int, value: int) -> None:
        """Update the value of the key if the key exists. Otherwise, add the key-value pair to the cache.
        If the number of keys exceeds the capacity from this operation, evict the least recently used key.
        """
        if len(self.storage) == self.capacity:  # Deal with capacity before the operation
            self.storage.pop_right()
        self.storage.update_or_append_left((key, value))


In [34]:
dll = DoublyLinkedList()
dll.append_left((1, 1))
dll.append_left((2, 2))
dll.append_right((3, 3))
dll.append_right((4, 4))
dll.append_right((4, 5))


In [35]:
dll

(2, 2) -> (1, 1) -> (3, 3) -> (4, 4) -> (4, 5)

In [None]:
# TODO: re-implement DLL using ListNode.key and ListNode.value

In [46]:
class ListNode:

    def __init__(self,
        key: int,
        val: int,
        prev = None,
        next = None,
    ):
        self.key = key
        self.val = val
        self.prev = prev
        self.next = next

    def __str__(self) -> str:
        return f"({self.key}: {self.val})"

    def __repr__(self) -> str:
        return f"({self.key}: {self.val})"


class DoublyLinkedList:

    def __init__(self):
        # Initiate head, tail, and length
        self.head = None
        self.tail = None
        self.length = 0

    def __len__(self) -> int:
        return self.length

    def __str__(self) -> str:
        list_str = []
        cur_node = self.head
        while cur_node:
            list_str.append(str(cur_node))
            cur_node = cur_node.next
        return f"{' -> '.join(list_str)}"

    def __repr__(self) -> str:
        list_str = []
        cur_node = self.head
        while cur_node:
            list_str.append(str(cur_node))
            cur_node = cur_node.next
        return f"{' -> '.join(list_str)}"

    def append_left(self, key: int, val: int) -> None:
        """Appends tuple(key-value) to left side (head) of list."""
        # Create new node; next set to current head
        new_node = ListNode(key, val, next=self.head)
        if self.head:  # If head exists, set new node as head
            self.head.prev = new_node  # Set head.prev to new node
        else:  # If head doesn't exist, no items in the list
            self.tail = new_node  # Set new node as head and tail
        self.head = new_node
        self.length += 1  # Increment length

    def append_right(self, key: int, val: int) -> None:
        """Appends tuple(key-value) to right side (tail) of list."""
        # Create new node; prev set to current tail
        new_node = ListNode(key, val, prev=self.tail)
        if self.tail:  # If tail exists, set new node as tail
            self.tail.next = new_node  # Set tail.next to new node
        else:  # If tail does not exist, no items in list
            self.head = new_node  # Set as head and tail
        self.tail = new_node
        self.length += 1  # Increment length
    
    def pop_left(self) -> tuple:
        """Pops leftmost (head) node from list, returning its value."""
        head = self.head  # Hold head as local variable
        self.head = head.next  # Set head to head.next
        # Remove head from list by setting new head's prev to None
        self.head.prev = None
        self.length -= 1  # Decrement length
        return head.val  # Return value
    
    def pop_right(self) -> tuple:
        """Pops rightmost (tail) node from list, returning its value."""
        tail = self.tail  # Hold tail as local variable
        self.tail = tail.prev  # Set tail to tail.prev
        # Remove tail from list by setting new tail's next to None
        self.tail.next = None
        self.length -= 1  # Decrement length
        return tail.val  # Return value

    def get(self, key: int) -> int:
        """Retrieve value at key. Returns None if key is not found."""
        # Iterate through nodes, checking keys
        cur_node = self.head  # Start with head as current node
        while cur_node:  # while current node exists...
            if cur_node.key == key:  # Check if node's key is a match
                # If a match is found, return the value (break)
                return cur_node.val
            else:  # If not match, iterate
                cur_node = cur_node.next
        return None  # If no match is found in list, return None

    def pop_by_key(self, key: int) -> tuple:
        """Removes key from list. Return None if not found."""
        # Iterate through nodes, checking keys
        cur_node = self.head  # Start with head as current node
        while cur_node:  # while current node exists...
            if cur_node.key == key:  # If match, remove
                # If list only holds one item
                if self.length == 1:
                    self.head = None
                    self.tail = None
                elif not cur_node.next:
                    # If tail, set new tail and prev.next to None
                    self.tail = cur_node.prev
                    self.tail.next = None
                elif not cur_node.prev:
                    self.head = cur_node.next
                    self.head.prev = None
                else:
                    new_prev = cur_node.prev
                    new_next = cur_node.next
                    new_next.prev = new_prev
                    new_prev.next = new_next
                self.length -= 1  # Decrement length
                return cur_node.val
            else:  # If not a match, iterate
                cur_node = cur_node.next
        return None  # If no match found, return None


class LRUCache:

    def __init__(self, capacity: int):
        """Initialize the LRU cache with positive size capacity."""
        self.capacity = capacity
        self.storage = DoublyLinkedList()

    def get(self, key: int) -> int:
        """Return the value of the key if the key exists, otherwise return -1."""
        val = self.storage.pop_by_key(key)  # Try to get value
        if val is not None:  # If key exists, set key to most most recently used (head)
            self.storage.append_left(key, val)
            return val
        else:
            return -1

    def put(self, key: int, value: int) -> None:
        """Update the value of the key if the key exists. Otherwise, add the key-value pair to the cache.
        If the number of keys exceeds the capacity from this operation, evict the least recently used key.
        """
        if len(self.storage) == self.capacity:  # Deal with capacity before the operation
            self.storage.pop_right()
        self.storage.pop_by_key(key)  # Look for key, popping any existing one out of the list
        self.storage.append_left(key, value)  # Add it 

In [47]:
dll = DoublyLinkedList()
dll.append_left(1, 1)
dll.append_left(2, 2)
dll.append_right(3, 3)
dll.append_right(4, 4)
dll.append_right(4, 5)
dll

(2: 2) -> (1: 1) -> (3: 3) -> (4: 4) -> (4: 5)

In [8]:
def tester(x=None):
    return 0 if x else 1
tester()

1

---

## Learn: Data Science Design Manual

Chapter 2: Math Prelims

Today, I worked through 2.1 Probability and 2.2 Descriptive Statistics. Next up is 2.3 Correlation Analysis.

Notes are in a separate notebook dedicated to the book.

---

## Build: Savor



First, I finished the function to get the data and load it into a dataframe:

In [None]:
def get_data_for_date_range(conn: Airtable, start: datetime, end: datetime) -> pd.DataFrame:
    """Retrieve Airtable data within the specified time range.
    Assumes that datetimes passed are in UTC, so convert to UTC beforehand."""
    records = []
    for page in conn.get_iter(sort=["-time_in"], page_size=50):
        # Page size of 50 = roughly # records in average day
        for record in page:
            # Airtable saves records in UTC
            time_in = datetime.fromisoformat(record["fields"]["time_in"][:-5] + "+00:00")
            if time_in > end:
                continue
            elif time_in > start and time_in < end:
                # Extract id and fields into flat dictionary
                re_dict = record["fields"]
                re_dict["id"] = record["id"]
                records.append(re_dict)
            else:
                break
        else:
            continue
        break  # Break outer loop when inner loop is broken
    # Load list of records into dataframe and return
    return pd.DataFrame.from_records(records)

Then I wrote and tested (not automated tests, yet) a function for expanding the relational columns:

In [None]:
def expand_list_cols(df: pd.DataFrame, cols: List[str]) -> pd.DataFrame:
    """Expands arrays contained in the columns then concatenates them
    back onto the original DataFrame.
    """
    df = df.copy()  # Make copy so original is not modified
    df_list = [None] * len(cols)  # Create list of same length
    for i, col in enumerate(cols):  # Iterate through column names
        # Expand column into its own dataframe
        df_list[i] = df[col].apply(pd.Series)
        # Rename and number columns
        # TODO: if only one column, don't suffix with number
        df_list[i] = df_list[i].rename(columns = lambda x: f"{col}_{x}")
        df = df.drop(columns=[col])
    # Concatenate original with new dataframes
    return pd.concat([df] + df_list, axis=1)
# === === === === === ===
# Expand all relation columns
relations = [
    "mental",
    "physical",
    "tag",
    "subloc",
    "moment_log",
    "who",
    "dose",
    "session",
    "project_location",
]

df2_engage = expand_list_cols(df1_engage, relations)
df2_engage.head()