# <center>Project 1: Biodiversity<center>

### Q1. Species Richness

In [None]:
def get_species_richness(observed_list):
    '''
        Determines the richness of a given list of observed species.
        
        Arguments:
            observed_list (list): A list of bird species
        
        Returns:
            (int, list): A 2-tuple containing the following info:
                           [0]: The number of unique species
                           [1]: A list of unique species in alphabetical order
    '''
    
    # extract all the unique species
    unique_species = set(observed_list)
    
    return len(unique_species), sorted(unique_species)

### Q2. Species Evenness

In [None]:
def get_species_evenness(observed_list):
    '''
        Determines the evenness of a given list of observed species.
        
        Arguments:
            observed_list (list): A list of bird species
        
        Returns:
            (int/float, list): A 2-tuple containing the following info:
                           [0]: The evenness of the list of species
                           [1]: A list of 2-tuples where each element contains 
                                the following info:
                                    [0]: A bird species
                                    [1]: The frequency of appearance of that
                                         species
    '''
    
    # exit if no species observed
    if not observed_list:
        return (0, [])
    
    total_population = len(observed_list)
    
    # determine the frequency at which each bird species was observed
    species_frequencies = {}
    for bird in observed_list:
        if bird not in species_frequencies:
            species_frequencies[bird] = 0
        species_frequencies[bird] += 1
    
    # calculate the Simpson's index based on the frequencies calculated
    simpsons_index = 0
    for freq in species_frequencies.values():
        simpsons_index += (freq / total_pop) ** 2
        
    return 1 / simpsons_index, sorted(species_frequencies.items())

### Q3.  Comparing habitats

In [None]:
from hidden import get_species_richness, get_species_evenness

DIVERSITY_FUNCTIONS = { "richness": get_species_richness,
                        "evenness": get_species_evenness }

def compare_diversity(observed_list, diversity_measure):
    '''
        Compares the diversities of different habitats based on a specified 
        diversity measure.
        
        Arguments:
            observed_list (list): A list of bird species
            
        Returns:
            list: A list of 2-tuples where each tuple contains the following
                  info:
                      [0]: Habitat name
                      [1]: Diversity of that habitat
                  The list would be sorted from most diverse to least diverse
                  habitat.
    '''
    
    # identify the different habitats and the birds seen in them
    habitats = build_habitat(observed_list)
    
    # determine the diversity of each habitat based on the measure specified
    diversities = []
    for habitat, species_seen in habitats:
        diversity = DIVERSITY_FUNCTIONS[diversity_measure](species_seen)[0]
        diversities.append((habitat, diversity))
    
    # sort the habitat-diversity pairs from most diverse to least diverse 
    diversities.sort(key=lambda x:(-x[1], x[0]))
    
    return diversities
        
def build_habitat(observed_list):
    '''
        Identifies the different habitats and records the species that were
        observed in them.
        
        Arguments:
            observed_list (list): A list of bird species
            
        Returns:
            dict: A dictionary where:
                    keys: Habitat names
                    values: Lists containing the bird species observed in those
                            habitats
    '''
    
    habitat_list = {}
    for species, habitat in observed_list:
        if habitat not in habitat_list:
            habitat_list[habitat] = []
        habitat_list[habitat].append(species)
                          
    return habitat_list.items()

### Q4.  Comparing habitats

In [None]:
def optimise_study(sample_data, unseen_species, consecutive_visits):
    '''
        Evaluates the effect that consecutive visits and unseen species 
        thresholds have on the accuract of diversity estimates.
        
        Arguments:
            sample_data (list)      : A list where each element is a list of 
                                      bird species seen during a sequential 
                                      visit.
            unseen_species (int)    : The minimum number of previously unseen 
                                      species that must be observed before a 
                                      visit is deemed productive.
            consecutive_visits (int): The number of consecutive unproductive 
                                      visits, after the initial visit, that 
                                      must occur to trigger the stopping rule.
        Returns:
            tuple: A 2-tuple containing the following info:
                    [0]: The number of visits that will occur before the study 
                         has stopped
                    [1]: The proportion of the total bird species observed by 
                         the study at that point, compared to if all sampling 
                         visits contained in sample_data had been conducted.
    '''
    
    all_species = set()
    
    num_consec_unprod_visits = 0
    num_visits_made = 0
    num_species_seen = 0
    
    study_stopped = False
    
    for visit_num, observed_list in enumerate(sample_data):
        
        # identify the unique species observed during the current visit
        species_seen = set()
        for species in observed_list:
            species_seen.add(species)
            
        num_new_species_seen = len(species_seen - all_species) 
        
        # update the set of all the species seen so far
        all_species = all_species | species_seen
        
        if not study_stopped:
            
            # keep track of the number of species seen so far, as well as the
            # number of visits made
            num_species_seen = len(all_species)
            num_visits_made += 1
            
            # assess the nature of the visit and infer whether or not the 
            # study should be stopped
            if visit_num > 0 and num_new_species_seen < unseen_species:
                num_consec_unprod_visits += 1
                if num_consec_unprod_visits == consecutive_visits:
                    study_stopped = True
            else:
                num_consec_unprod_visits = 0
     
    return (num_visits_made, num_species_seen / len(all_species))

### BONUS:  Predicting Species in Unsampled Habitats

In [None]:
def infer_bird_species(environment, observations, region_list):
    '''
        Predicts the bird species likely to be observed in specified regions.
        
        Arguments:
            environment (list) : A list of lists of regions in the environment. 
                                 Each region is represented as a list 
                                 (of length n) of Boolean values indicating the 
                                 presence or absence of each of n environmental
                                 factors.
            observations (list): A list of lists of observations of each region 
                                 in the environment. Each region is represented 
                                 as a list (of variable length) of the bird 
                                 species observed in that region. 
            region_list (list) : A list of regions (indicated as (i, j) tuples, 
                                 where no sampling has been carried out. 
                                 Each of these regions will correspond to 
                                 an empty list in observations.
        
        Returns:
            list: A list containing sorted lists of predicted bird species for 
                  each of the regions in region_list.
    '''
    
    # extract environment metadata
    num_rows = len(environment)
    num_cols = len(environment[0])
    num_factors = len(environment[0][0])
    
    # determine the requirements for each species to survive 
    species_req_factors = {}
    for row in range(num_rows):
        for col in range(num_cols):
            
            region = environment[row][col]
            
            # ignore regions where no sampling has been done
            if not region:
                continue
                
            for species in observations[row][col]:
                
                if species not in species_req_factors:
                    # intially assume that the species requires all the 
                    # factors to survive in a region
                    species_req_factors[species] = [1] * num_factors
                    
                # update factors based on the current region's factors
                update_reqs(species_req_factors[species], region)

    # predict the species likely to be observed in the specified regions
    predictions = []
    for region in region_list:
        
        row, col = region
        
        predicted_species = []
        for species in species_req_factors :
            if is_likely_to_be_observed(species_req_factors[species], 
                                                     environment[row][col]):
                predicted_species.append(species)
                
        predictions.append(sorted(predicted_species))
    
    return predictions

def update_reqs(req_factors, curr_factors):
    '''
        Updates the factors required by a species to survive.
        
        Arguments:
            req_factors (list):  A list of Boolean values indicating the 
                                 presence or absence of each of n environmental
                                 factors that have been deemed to be necessary
                                 for a species survival in a region.
            curr_factors (list): A list of Boolean values indicating the 
                                 presence or absence of each of n environmental
                                 factors in the current region being evaluated.
    '''
    
    # identify the common factors between the regions a species has been 
    # observed in so far
    # (Intuition: if one region has a factor present and another doesn't, then
    # that factor isn't pertinent to the species' survival)
    for i in range(len(req_factors)):
        req_factors[i] = req_factors[i] and curr_factors[i]

def is_likely_to_be_observed(req_factors, curr_factors):
    '''
        Determines if a species is likely to be observed in a region.
        
        Arguments:
            req_factors (list):  A list of Boolean values indicating the 
                                 presence or absence of each of n environmental
                                 factors that have been deemed to be necessary
                                 for a species survival in a region.
            curr_factors (list): A list of Boolean values indicating the 
                                 presence or absence of each of n environmental
                                 factors in the current region being evaluated.
    '''
    
    # compare the factors in the region with those required by the species.
    # if any of the required factors are missing, then the species is unlikely
    # to be observed in that region
    for i in range(len(req_factors)):
        if req_factors[i] and not curr_factors[i]:
            return False
        
    return True