### Data Structure used in problems 

In [25]:
class LinkedListNode:

    def __init__(self, data):
        self.data = data
        self.next = None

class Stack:

    def __init__(self):
        self.num_elements = 0
        self.head = None

    def push(self, data):
        new_node = LinkedListNode(data)
        if self.head is None:
            self.head = new_node
        else:
            new_node.next = self.head
            self.head = new_node
        self.num_elements += 1

    def pop(self):
        if self.is_empty():
            return None
        temp = self.head.data
        self.head = self.head.next
        self.num_elements -= 1
        return temp

    def top(self):
        if self.head is None:
            return None
        return self.head.data

    def size(self):
        return self.num_elements

    def is_empty(self):
        return self.num_elements == 0

## Problem 1: LRU Cache


### Least Recently Used Cache

We have briefly discussed caching as part of a practice problem while studying hash maps.

The lookup operation (i.e., get()) and put() / set() is supposed to be fast for a cache memory.

While doing the get() operation, if the entry is found in the cache, it is known as a cache hit. If, however, the entry is not found, it is known as a cache miss.

When designing a cache, we also place an upper bound on the size of the cache. If the cache is full and we want to add a new entry to the cache, we use some criteria to remove an element. After removing an element, we use the put() operation to insert the new element. The remove operation should also be fast.

For our first problem, the goal will be to design a data structure known as a Least Recently Used (LRU) cache. An LRU cache is a type of cache in which we remove the least recently used entry when the cache memory reaches its limit. For the current problem, consider both get and set operations as an use operation.

Your job is to use an appropriate data structure(s) to implement the cache.

In case of a cache hit, your get() operation should return the appropriate value.
- In case of a cache miss, your get() should return -1.
- While putting an element in the cache, your put() / set() operation must insert the element. If the cache is full, you must write code that removes the least recently used entry first and then insert the element.
- All operations must take O(1) time.

For the current problem, you can consider the size of cache = 5.

Here is some boiler plate code and some example test cases to get you started on this problem:

In [7]:
class LRU_Node:
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.prev = None
        self.next = None
            

class LRU_Cache:
    def __init__(self, capacity = 5):
        ''' #Initialize LRU_Cache node  '''
        
        self.hmap = dict()
        self.capacity = capacity
        self.num_entries = 0
        self.head = None
        self.tail = None
    
        
    # get(key)    
    def get(self, key):
        
        if self.capacity == 0:
            print("Warning: LRU capacity is 0")

        # Check for key in hash map
        if self.hmap.get(key) is not None:
            node = self.hmap[key]
            
            # Update tail pointer if item removed is tail node
            if node == self.tail:
                self.tail = node.next

                
            # Move updated node to front of queue    
            self.enQueue(node)
            return node.value

        return -1

    # set(key,value)
    def set(self, key, value):
        
        if self.capacity == 0:
            print("Warning: LRU capacity is 0")
            return
        
        # Check if key is in map, update or append new node to the front of the queue
        if self.hmap.get(key) is not None:
            node = self.hmap[key]
            node.value = value
            node.key = key
        else:
            # check if LRU is full, remove least used node (tail) from queue
            node = LRU_Node(key, value)
            if self.num_entries == self.capacity:
                self.removeTail()
                self.num_entries -= 1
            self.hmap[key] = node
            self.num_entries += 1
            
        # add new node to the head of the list
        self.enQueue(node)
        
    # removeTail()
    def removeTail(self):

        # Check if tail node exists, remove node from queue and hash map
        # Update num_entries to reflect number of items in queue
        if self.tail:
            next_node = self.tail.next
            del self.hmap[self.tail.key]
            self.tail = next_node

    # enQueue(node)
    def enQueue(self, node):

        # Append node to front of the list, and set tail node to head if new node
        # Update num_entries
        if self.head:
            node_tmp = self.head
            self.head = node
            node.prev = node_tmp
            node_tmp.next = node
        else:
            self.head = node
            self.tail = self.head    


if __name__ == "__main__":
    # Test Case #1: Boilerplate 
    our_cache = LRU_Cache(5)

    our_cache.set(1, 1);
    our_cache.set(2, 2);
    our_cache.set(3, 3);
    our_cache.set(4, 4);

    print(our_cache.get(1))  # returns 1
    print(our_cache.get(2))  # returns 2
    print(our_cache.get(9))  # returns -1 because 9 is not present in the cache

    our_cache.set(5, 5) 
    our_cache.set(6, 6)

    print(our_cache.get(3))  # returns -1 because the cache reached it's
                             # capacity and 3 was the least recently used entry

    # Test Case #2: Warning issued for Zero Capacity LRU
    our_cache = LRU_Cache(0)

    our_cache.set(1, 1)
    # returns warning

    print(our_cache.get(1))
    # should return -1

    # Test Case 3 - updating existing key
    our_cache = LRU_Cache(2)

    our_cache.set(1, 1)
    our_cache.set(2, 2)
    our_cache.set(1, 10)

    print(our_cache.get(1))
    # returns 10

    print(our_cache.get(2))
    # returns 2

1
2
-1
-1
-1
10
2


## Problem 2: File Recursion

Finding Files

In [21]:
import os, fnmatch
import re

def find_files(suffix, path):
    """
    Find all files beneath path with file name suffix.
    Note that a path may contain further subdirectories
    and those subdirectories may also contain further subdirectories.
    There are no limit to the depth of the subdirectories can be.
    Args:
      suffix(str): suffix if the file name to be found
      path(str): path of the file system
    Returns:
       a list of paths
    """

    cfiles_dir = []
    path = os.getcwd() + "/" + path
  
    extension = re.findall(r'(\w+\.\w+$)', path)
    #print(extension)
    if len(extension) > 0:
        print("Invalid path: {}".format(extension))
        return None
              
    # If path does not exist, return False    
    if not os.path.exists(path):
        print("Path: {} does not exist!".format(path))
        return None 

    for root, dirs, files in os.walk(path):
        for file_name in files:
            if fnmatch.fnmatch(file_name, suffix):
                if root not in cfiles_dir:
                    cfiles_dir.append(root)

    if len(cfiles_dir) == 0:
        print("File(s): {} not found!".format(suffix))
        return None
    else:   
        return cfiles_dir
    
if __name__ == "__main__":
    path = "testdir"
    print(f"Path {path} === ", find_files("*.c", path), end='\n\n')

    path = "testdir/subdir3"
    print(f"Path {path} === ", find_files("*.c", path), end='\n\n')

    path = "testdir/subdir4"
    print(f"Path {path} === ", find_files("*.c", path), end='\n\n')

    path = "testdir/test.h"
    print(f"Path {path} === ", find_files("*.c", path), end='\n\n')

Path testdir ===  ['/Users/mukesh/Projects/learning/udacity/dsand/projects/P1/testdir', '/Users/mukesh/Projects/learning/udacity/dsand/projects/P1/testdir/subdir3/subsubdir1', '/Users/mukesh/Projects/learning/udacity/dsand/projects/P1/testdir/subdir5', '/Users/mukesh/Projects/learning/udacity/dsand/projects/P1/testdir/subdir1']

Path testdir/subdir3 ===  ['/Users/mukesh/Projects/learning/udacity/dsand/projects/P1/testdir/subdir3/subsubdir1']

File(s): *.c not found!
Path testdir/subdir4 ===  None

Invalid path: ['test.h']
Path testdir/test.h ===  None



## Problem 3: Huffman Coding

In [27]:
import heapq as h
import sys

debug = 0
class HeapNode:
    def __init__(self, char, frequency):
        self.char = char
        self.frequency = frequency
        self.right = None
        self.left = None
        
    def __lt__(self, other):
        return self.frequency < other.frequency
        
    def __str__(self, level=0):
        ret = "\t"*level+repr(self.char)+"\n"
        return ret

    def __repr__(self):
        return self.frequency
    
        
class HuffmanCode:
    def __init__(self, text):
        self.text = text
        self.huff_tree = []
        self.codes = dict()
        self.decodes = dict()
        
    
    def createFrequencyDict(self):

        # Create Hash Counter for character frequency, handles spaces
        freq_dict = dict()
        for char in self.text:
            if char not in freq_dict.keys():
                freq_dict[char] = 1
            else:
                freq_dict[char] += 1
                    
        return freq_dict

    def createTree(self, freq_dict):

        for char in freq_dict.keys():
            char_node = HeapNode(char, freq_dict[char])
            h.heappush(self.huff_tree, char_node)

        # merging nodes in Heap    
        while len(self.huff_tree) > 1:
            node1 = h.heappop(self.huff_tree)
            node2 = h.heappop(self.huff_tree)

            # Creating new node with "empty" character value, but combining frequency weights
            # Set pointer to child nodes, left child < right child
            merged_node = HeapNode(None, node1.frequency + node2.frequency)
            merged_node.left = node1
            merged_node.right = node2

            # Push merged_node back into the heap
            h.heappush(self.huff_tree, merged_node)


    def createCodeDict_helper(self, root, current_code):
        if root == None:
            return
        
        if root.char != None:
            if len(current_code) > 0:
                self.codes[root.char] = current_code
                self.decodes[current_code] = root.char
            else:
                current_code = '0'
                self.codes[root.char] = current_code
                self.decodes[current_code] = root.char
            return
        
        self.createCodeDict_helper(root.left, current_code + "0")
        self.createCodeDict_helper(root.right, current_code + "1")
        
        
    def createCodeDict(self):
        root = h.heappop(self.huff_tree)
        current_code = ""
        self.createCodeDict_helper(root, current_code)


    def encodeText(self, text):
        encoded_text = ""

        for character in text:
            encoded_text += self.codes[character]
    
        return encoded_text


    def decodeText(self, encoded_text):
        current_code = ""
        decoded_text = ""

        if encoded_text != -1:
            for bit in encoded_text:
                current_code += bit
                if(current_code in self.decodes):
                    character = self.decodes[current_code]
                    decoded_text += character
                    current_code = ""
        else:
            decoded_text = -1
            
        return decoded_text



def huffman_encoding(data):
    huff_code = -1
    hcode = None
    
    if len(data) > 0:
        hcode = HuffmanCode(data)
        frequency_dict = hcode.createFrequencyDict()
        hcode.createTree(frequency_dict)
        hcode.createCodeDict()
        huff_code = hcode.encodeText(data)
    
    else:
        print("Input data is empty string, returning {}!".format(huff_code))
    
    return huff_code, hcode

def huffman_decoding(data,tree):
    text = -1

    if data != -1:
        text = tree.decodeText(data)

    return text


if __name__ == "__main__":
    codes = {}

    a_great_sentence = "The bird is the word"

    print ("The size of the data is: {}\n".format(sys.getsizeof(a_great_sentence)))
    print ("The content of the data is: {}\n".format(a_great_sentence))

    encoded_data, tree = huffman_encoding(a_great_sentence)

    print ("The size of the encoded data is: {}\n".format(sys.getsizeof(int(encoded_data, base=2))))
    print ("The content of the encoded data is: {}\n".format(encoded_data))

    decoded_data = huffman_decoding(encoded_data, tree)

    print ("The size of the decoded data is: {}\n".format(sys.getsizeof(decoded_data)))
    print ("The content of the encoded data is: {}\n".format(decoded_data))
    


The size of the data is: 69

The content of the data is: The bird is the word

The size of the encoded data is: 36

The content of the encoded data is: 1000111111100100001101110000101110110110100011111111001101010011100001

The size of the decoded data is: 69

The content of the encoded data is: The bird is the word



## Problem 4: Active Directory

In Windows Active Directory, a group can consist of user(s) and group(s) themselves. We can construct this hierarchy as such. Where User is represented by str representing their ids.

In [28]:
class Group(object):
    def __init__(self, _name):
        self.name = _name
        self.groups = []      # list of class Groups
        self.users = []       # list of string user

    def add_group(self, group):
        if debug: print("[[add_group]]: {}".format(group))
        self.groups.append(group)

    def add_user(self,user):
        if debug: print("[[add_user]]: {}".format(user))
        self.users.append(user)

    def get_groups(self):
        return self.groups

    def get_users(self):
        return self.users

    def get_name(self):
        return self.name

    
def is_user_in_group(user, group, grp_queue = None):

    group_queue = []

    # Match user to group's name
    if user == group.get_name():
        # print("user matches group name")
        return True
    
    # Match user to get_users
    elif user in group.get_users():
        # print("user is in group's user list")
        return True
    
    # if Group's group list not empty, recurse 
    if len(group.get_groups()) == 0:
        # print("group's group list is empty, return False")
        return False
    
    else:
        
        # print("group.name = {}".format(group.get_name()))
        # print("Descend into Group's group at a time, group_list: {}".format(group.get_groups()))
        if grp_queue is None:
            grp_queue = group.get_groups()
            return is_user_in_group(user, group, grp_queue)

        else:
            grp_queue += group.get_groups()  
            return is_user_in_group(user, grp_queue.pop(0), grp_queue)

if __name__ == "__main__":                  
    parent = Group("parent")
    child = Group("child")
    sub_child = Group("subchild")

    sub_child_user = "sub_child_user"
    sub_child.add_user(sub_child_user)

    child.add_group(sub_child)
    parent.add_group(child)

## Problem 5: Blockchain

A Blockchain is a sequential chain of records, similar to a linked list. Each block contains some information and how it is connected related to the other blocks in the chain. Each block contains a cryptographic hash of the previous block, a timestamp, and transaction data. For our blockchain we will be using a SHA-256 hash, the Greenwich Mean Time when the block was created, and text strings as the data.

Use your knowledge of linked lists and hashing to create a blockchain implementation.


We can break the blockchain down into three main parts.

First is the information hash:

In [34]:
import hashlib

class Block:

    def __init__(self, timestamp, data, previous_hash):
        self.timestamp = timestamp
        self.data = data
        self.prev = None
        self.previous_hash = previous_hash
        self.hash = self.calc_hash(self.data)

    def calc_hash(self, string):
        sha = hashlib.sha256()

        self.string = string
        hash_str = self.string.encode('utf-8')
        sha.update(hash_str)

        return sha.hexdigest()
  

    def _print(self):
        print("Printing: timestamp, data, previous_hash, hash_code")
        print(self.timestamp)
        print(self.data)
        print(self.previous_hash)
        print(self.hash)

class BlockChain:

    def __init__(self):
        self.head = None
        self.tail = None     
            
          
    def append(self, timestamp, data, previous_hash):
            
        if self.head is None:
            #print("head is none, instantiate Block()")
            self.head = Block(timestamp, data, previous_hash)
            #self.head._print()
            self.tail = self.head
        else:
            #print("head exists, instantiate new node to  Block()")
            new_node = Block(timestamp, data, previous_hash)
            #new_node._print()
            self.head.prev = new_node
            #self.tail = self.head
            #print("new_node: {}".format(new_node))
            #print("self.head.prev: {}".format(self.head.prev))
            #print("self.tail: {}".format(self.tail))
            #print("before update, self.head: {}".format(self.head))
            self.head = new_node
            #print("after update, self.head: {}".format(self.head))
            

    def print(self, index=0):
        #print("[[print]]")
        if index == 0: 
            if self.tail is not None:
                self.tail._print()
        else:
         #   print("[[Passed arg]] index: {}".format(index))
            node = self.tail
            idx = 0
            while node != None:
                #print("idx: {}".format(idx))
                if idx == index:
                    node._print()
                    return
                node = node.prev
                idx +=1
                #print("increment idx: {}".format(idx))
                
if __name__ == "__main__":                
    # Test Case 1                
    b1 = BlockChain()
    b1.append('0612_1950', "N1", None)
    b1.print()

    # Test Case 2
    print("***")
    b1.append('0612_2042', "N2", "b1")
    b1.print(1)

    # Test Case 3
    print("***")
    b1.append('0623_2331', "N3", "b2")
    b1.print(2)

    # Test Case 4: printing empty block node
    print("***")
    b1.append('', "", "")
    b1.print(3)

Printing: timestamp, data, previous_hash, hash_code
0612_1950
N1
None
b79f477f6d435116155b1121455748240bb5e7c81f7043519980df21a8167ca1
***
Printing: timestamp, data, previous_hash, hash_code
0612_2042
N2
b1
2a5f06422d35ac8977cbf311a3178de243428e2c1ca836ed16477cac024360ec
***
Printing: timestamp, data, previous_hash, hash_code
0623_2331
N3
b2
2da09e1fac71f1257e0efcb158f2d71e7d11ccb7a7db83461ce7f4cade83e770
***
Printing: timestamp, data, previous_hash, hash_code



e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855


## Problem 6: Union and Intersection

In [38]:
class Node:
    def __init__(self, value):
        self.value = value
        self.next = None

    def __repr__(self):
        return str(self.value)

class LinkedList:
    def __init__(self):
        self.head = None
        self.size = 0
        
    def __str__(self):
        cur_head = self.head
        out_string = ""
        while cur_head:
            out_string += str(cur_head.value) + " -> "
            cur_head = cur_head.next

        return out_string

    def append(self, value):
        if self.head is None:
            self.head = Node(value)
            return

        node = self.head
        while node.next:
            node = node.next

        node.next = Node(value)
        self.size += 1
        
    def size(self):
        size = 0
        node = self.head
        while node:
            size += 1
            node = node.next

        return size


def convertLList_to_PySet(llist):
    pylist = []

    node = llist.head
    while node:
        pylist.append(node.value)
        node = node.next

    pyset = set(pylist)
    return pyset

    
def union(llist_1, llist_2):

    l1 = convertLList_to_PySet(llist_1)
    l2 = convertLList_to_PySet(llist_2)

    union = l1 | l2

    llist = LinkedList()

    for value in union:
        llist.append(value)
        
    return llist


def intersection(llist_1, llist_2):

    l1 = convertLList_to_PySet(llist_1)
    l2 = convertLList_to_PySet(llist_2)

    intersection = l1 & l2

    llist = LinkedList()
    for value in intersection:
        llist.append(value)

    return llist

def printList(llist):
    
    node = llist.head
    while node:
        print(node.value)
        node = node.next
        

if __name__ == "__main__":

    linked_list_1 = LinkedList()
    linked_list_2 = LinkedList()

    element_1 = [3,2,4,35,6,65,6,4,3,21]
    element_2 = [6,32,4,9,6,1,11,21,1]

    for i in element_1:
        linked_list_1.append(i)

    for i in element_2:
        linked_list_2.append(i)

    print (union(linked_list_1,linked_list_2))
    print (intersection(linked_list_1,linked_list_2))

    # Test case 2

    linked_list_3 = LinkedList()
    linked_list_4 = LinkedList()

    element_1 = [3,2,4,35,6,65,6,4,3,23]
    element_2 = [1,7,8,9,11,21,1]

    for i in element_1:
        linked_list_3.append(i)

    for i in element_2:
        linked_list_4.append(i)

    print (union(linked_list_3,linked_list_4))
    print (intersection(linked_list_3,linked_list_4))

32 -> 65 -> 2 -> 35 -> 3 -> 4 -> 6 -> 1 -> 9 -> 11 -> 21 -> 
4 -> 21 -> 6 -> 
65 -> 2 -> 35 -> 3 -> 4 -> 6 -> 1 -> 7 -> 8 -> 9 -> 11 -> 21 -> 23 -> 

