In [39]:
class Group(object):
    def __init__(self, _name):
        self.name = _name
        self.groups = []
        self.users = []

    def add_group(self, group):
        self.groups.append(group)

    def add_user(self, user):
        self.users.append(user)

    def get_groups(self):
        return self.groups

    def get_users(self):
        return self.users

    def get_name(self):
        return self.name


parent = Group("parent")
child = Group("child")
sub_child = Group("subchild")

sub_child_user = "sub_child_user"
sub_child.add_user(sub_child_user)

child.add_group(sub_child)
parent.add_group(child)

### Write a function that provides an efficient look up of whether the user is in a group.

#### Notes:
First proposal of Algorithm:
 * For each user in the group
 * Check for user memebership in that group, if true return True
 * Else for each sub-group, iterate (or recurse)
 
Given that sub-group could be a member of several other groups, we can potentially have a lot of repeated work. For example if there exist a group C that is a subgroup of A and B, with the algorithm above we would search it's users and sub-groups twice. 

* first optimzation is to ansure that we cache the search results and we don't 'visit' a group more than once.

* Second possible optimization would be to flatten construct a cache of users -> groups, if this is an operation that we perform refularly and needs to be optimized it can be an option. 

For now I'll implement the first algorithm with the first optimization

In [40]:
def is_user_in_group(user, group, cache=True):
    """
    Return True if user is in the group, False otherwise.

    Args:
      user(str): user name/id
      group(class:Group): group to check user membership against
    """
    if cache:
        visited = list()
        return recursive_user_search(user, group, visited)
    else:
        return recursive_user_search_no_cache(user, group)

def recursive_user_search(user, group, visited):
    if user in group.get_users():
        return True
    
    for group in group.get_groups():
        
        if group.get_name() not in visited:
            visited.append(group.get_name())
            return recursive_user_search(user, group, visited)
            
    return False

def recursive_user_search_no_cache(user, group):
    if user in group.get_users():
        return True
    
    for group in group.get_groups():
        return recursive_user_search_no_cache(user, group)
            
    return False

            

In [41]:
import random
root = Group('root')
lvl_1 = [Group('A_{}'.format(g)) for g in range(0, 100) ]
lvl_2 = [Group('B_{}'.format(g)) for g in range(0, 1000) ]
lvl_3 = [Group('C_{}'.format(g)) for g in range(0, 10000) ]
random.seed(5)

user_limit = 10000
quartile = user_limit//4
users = ['user_{}'.format(u) for u in range(0, user_limit)]

for group in lvl_1:
    user_indexes = [i for i in range(0, quartile)]
    for user in user_indexes:
        if random.random() <0.5:
            group.add_user(users[user])
    root.add_group(group)
    
for group_A in lvl_1:
    user_indexes = [i for i in range(quartile, quartile*2)]
    for user in user_indexes:
        if random.random() <0.4:
            group_A.add_user(users[user])
        
    for group_B in lvl_2:
        if random.random() < 0.4:
            group_A.add_group(group_B)
            
for group_B in lvl_2:
    
    user_indexes = [i for i in range(quartile*2, quartile*3)]
    for user in user_indexes:
        if random.random() <0.3:
            group_B.add_user(users[user])
        
    for group_C in lvl_3:
        if random.random() < 0.3:
            group_B.add_group(group_C)
            
for grup_C in lvl_3:
    user_indexes = [i for i in range(quartile*3, user_limit)]
    for user in user_indexes:
        if random.random() <0.2:
            group_C.add_user(users[user])

In [44]:
%timeit is_user_in_group('user_9560', root, cache=False)

52.7 µs ± 1.04 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [45]:
%timeit is_user_in_group('user_9560', root, cache=True)

54.9 µs ± 997 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


I would really like to explore this in a more systematic and rigurus way but sadly I need to catch-up to the graduation deadline