### String lookup

In [1]:
from nltk.corpus import words

word_list = [word for word in words.words() if word.islower()]

In [2]:
def search_sorted(data, key):
    """
    Searches the key in data using binary search 
    and returns True if found and False otherwise. 
    
    Arguments
    data -- (list) the elements to search within
    key -- the key to search for
    """
    low = 0 
    high = len(data) - 1 
    while (low <= high):
        mid = (high + low)//2
        if data[mid] == key:
            return True
        if key < data[mid]:
            high = mid - 1
        else:
            low = mid + 1
    return False


class BinaryLookup: 
    # your code here
    ''' look up words using binary search'''
    
    def __init__(self,words):
        '''words -- a list of words in no particular order, is sorted here'''
        self.words = words[:]
        self.words.sort()
        
    def __contains__(self,word):
        ''' use binary search to see if word appears in self.words'''
        return search_sorted(self.words, word)
    
    def __sizeof__(self):
        ''' get the bytes used by the self.words list'''
        return getsizeof(self.words)
    

instead of having a single ordered list, it has a list of 26 lists, where each list is just a sorted list of the words for that letter of the alphabet (you can assume that all words you are passed begin with a lower case letter)

In [3]:
# my code here
offset = ord("a")
# my code here
class AlphaLookup:
    #your code here
    ''' looking up words based on the first letter, followed by binary search'''
    
    def __init__(self,words):
        '''self.lists is created as list of 26 sorted lists
        each list has all words starting with a particular letter, sorted'''
        self.lists = []
        for i in range(26):
            self.lists.append([])
        for word in words:
            self.lists[ord(word[0]) - offset].append(word)
        for L in self.lists:
            L.sort()
        
    def __contains__(self,word):
        '''see if self.words contains word, first by finding the appropriate list for the first
        letter of the word, and then doing binary search'''
        L = self.lists[ord(word[0]) - offset]
        return search_sorted(L, word)
    
    def __sizeof__(self):
        '''get the memory in byes used by all the lists in self.lists'''
        total = getsizeof(self.lists)
        for L in self.lists:
            total += getsizeof(L)
        return total

lists for each possible combination of the first two letters. Be careful about one letter words!

In [None]:
offset = ord("a")

def get_index(word):
    '''retun an index between 0 and 26*27 based on the first two letters of word'''
    if len(word) == 1:
        return ord(word) - offset
    return 26+ (ord(word[0]) - offset)*26 + (ord(word[1]) - offset)

# my code here
class Alpha2Lookup:
    ''' looking up words based on the first two letters, followed by binary search'''    
    # your code here
    
    def __init__(self,words):
        '''self.lists has up to 26*27 sorted lists, though lists are created as needed
        each list has all words starting with two letter combination'''
        self.lists = [None]*(27*26)
        for word in words:
            index = get_index(word)
            if not self.lists[index]:
                self.lists[index] = []
            self.lists[index].append(word)
        for L in self.lists:
            if L:
                L.sort()
        
    def __contains__(self,word):
        '''see if self.words contains word, first by finding the appropriate list for the first two
        letters of the word, and then doing binary search if it exists'''
        L = self.lists[get_index(word)]
        return search_sorted(L, word)
    
    def __sizeof__(self):
        '''get the memory in byes used by all the lists in self.lists'''
        total = getsizeof(self.lists)
        for L in self.lists:
            total += getsizeof(L)
        return total
    


 HashLookup class

In [None]:
class HashLookup:
    # your code here
    ''' looking up words using a hash map'''
    def __init__(self,words):
        '''create a hash map for words (strings) with the number of buckets equal to the number of words
        unused buckets are None, buckets with one member just the string, buckets with more than one 
        are lists'''
        self.size = len(words)
        self.map = [None]*self.size
        for word in words:
            num = self._get_hash(word)
            if self.map[num] == None:
                self.map[num] = word
            else:
                if isinstance(self.map[num],str):
                    self.map[num] = [self.map[num]]
                self.map[num].append(word)

    def _get_hash(self,word):
        '''use hash function to get appropriate index for self.map'''
        return hash(word) % self.size
            
    def __contains__(self,word):
        '''retrieve the bucket corresponding to the hash for word and return True if that word is
        in the bucket, False if not'''
        result = self.map[self._get_hash(word)]
        if result == None:
            return False
        elif isinstance(result,str):
            return result == word
        else:
            for compare in result:
                if word == compare:
                    return True
            return False
        
    def __sizeof__(self):
        '''get the number of bytes used by the lists in the hash map'''
        total = getsizeof(self.map)
        for item in self.map:
            if isinstance(item,list):
                total += getsizeof(item)
        return total
        
    def collisions_info(self):
        '''print information about percentage of buckets in the hash map where there are
        collisions (two words in same bucket), and the average number of words involved'''
        collisions = 0
        bucket_size = 0
        for bucket in self.map:
            if isinstance(bucket,list):
                collisions += 1
                bucket_size += len(bucket)
        print("percent collisions")
        print(collisions/self.size)
        print("average size of collisions")
        print(bucket_size/collisions)

evaluate the amount of space your search methods uses.

In [None]:
# your code here
for lookup_class in [set, BinaryLookup, AlphaLookup, Alpha2Lookup,HashLookup]:
    print(lookup_class)
    lookup = lookup_class(word_list)
    print(getsizeof(lookup))
# your code here

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from collections import defaultdict

Serpinsky Triangle

In [None]:
def sierpienski(x,y,size,n):
    '''
    Draws a Sierpienski triangle using recursion
    Inputs: Given depth n, the coordinates x and y, 
    and the side length of the outermost triangle,size
    Output: Sierpienski triangle
    '''
    # your code here
    if n==0:
        return
    draw_triangle(x,y,size)    

    #Call sierpienski() recursively
    sierpienski(x-0.25*size, y, 0.5*size,n-1)
    sierpienski(x+0.25*size, y, 0.5*size,n-1)
    sierpienski(x, y+(0.5*size*(np.sqrt(3)/2)), 0.5*size,n-1)
    # your code here

Set implementation with BSTs

In [None]:
class TreeSet: 
    """
    A set implementation based on a binary tree.
    """
    
    def __init__(self):
        """
        Create and initialize a TreeSet object. 
        """
        self.value = None
        self.left  = None
        self.right = None 
    
    def insert(self, value):
        """
        Insert the given value into the TreeSet
        
        Arguments:
        value --  the value to insert
        """
        # your code here
        if self.value is None:
            self.value = value
            return
        
        # If the value is already present in the tree, the function
        # does nothing and returns.
        if self.value == value: 
            return 
        
        # Traverse the left subtree
        if value < self.value:
            if self.left is None: 
                self.left = TreeSet()
            self.left.insert(value)
                
        # Traverse the right subtree
        if value > self.value:
            if self.right is None:
                self.right = TreeSet()
            self.right.insert(value)
        # your code here

    def __contains__(self, value): 
        """
        Check to see if the binary tree has a certain value 

        Arguments:
        value -- the value to search for within the tree
        """
        # your code here
        if self.value == None:
            return False
        
        if value == self.value:
            return True 
        
        if value < self.value: 
            if self.left is None:
                return False 
            else:
                return value in self.left
        else:
            if self.right is None:
                return False
            else:
                return value in self.right 
            
        #your code here
          
    def __str__(self, s=""): 
        """
        A crude way to print the tree. A better way would be to print the tree by depth. 
        
        Note: __str__ is a special method, like __init__,__contains__, etc., that returns a string representation of an object.
        """
        if self.value is None:
            return "(An empty tree)"
        
        s += self.value + ", "

        if self.right is not None: 
            s += self.right.__str__()
            
        if self.left is not None:
            s += self.left.__str__()
            
        return s