### Metric Calculations
First we need to load a connection against the proper ES instance. We use an external module to load credentials from a file that will not be shared. If you want to run this, please use your own credentials, just put them in a file named '.settings' (in the same directory as this notebook) following the example file 'settings.sample'.

To work with **survey results** this notebook expect to find the following files:
* **UUID's**: ../data/uuids.csv
* **SURVEY**: ../data/survey-fake.csv

**If you need to use a different survey file, just modify in the second code cell below this one.**

In [1]:
import pandas

import util as ut

from util import ESConnection

es_conn = ESConnection()

In [2]:
# Read survey dataframe from survey and (response id, uuid) correspondence files

############# MODIFY YOUR SURVEY CSV FILE HERE ######################################
survey_df = ut.load_survey_df(survey_filepath='../data/survey-fake.csv',
                              uuids_filepath='../data/uuids.csv')
#####################################################################################

print('Identities with UUID found in survey file: ', len(survey_df))
print('Unique identities found: ', len(survey_df.uuid.unique()))
print()

def add_common_filters(source, s):
    return ut.add_survey_filters(s, survey_df)

def add_project_filter(s, project_name):
    if project_name != 'All':
        s = s.filter('term', project=project_name)
    
    return s

def get_active_authors(source, project_name, date_field, date_filter=None):
    # AUTHORS ACTIVE LAST YEAR
    s = ut.create_search(es_conn, source)
    s = add_project_filter(s, project_name)
    ## Filter out EMPLOYEES
    s = s.exclude('terms', author_org_name=['Mozilla Staff', 'Code Sheriff'])
    if date_filter is not None:
        s = s.filter('range', ** date_filter)
    # Count authors by uuid
    s.aggs.metric('uuid', 'cardinality', field='author_uuid', precision_threshold=1000000)
    result = s.execute()
    return result.to_dict()['aggregations']['uuid']['value']

def get_active_survey(source, project_name, date_field, date_filter=None):
    
    # AUTHORS ACTIVE LAST YEAR AND IN SURVEY
    s = ut.create_search(es_conn, source)
    s = add_project_filter(s, project_name)
    s = add_common_filters(source, s)
    if date_filter is not None:
        s = s.filter('range', ** date_filter)
    # Count authors by uuid
    s.aggs.metric('uuid', 'cardinality', field='author_uuid', precision_threshold=1000000)
    result = s.execute()
    return result.to_dict()['aggregations']['uuid']['value']

sources = ['git', 'bugzilla', 'github_issues', 'discourse', 'mbox']
projects = ['Gecko', 'Rust', 'Servo', 'Firefox', 'WebVR', 'Firefox OS (FxOS / B2G)', 'All']

coverage_df = pandas.DataFrame(columns=['source', 'project', 'authors(last year)', 'coverage(last year)',
                                        'authors', 'coverage'])

for source in sources:
    if source == 'bugzilla':
        date_field = 'creation_ts'
    else:
        date_field = 'grimoire_creation_date'
    date_filter = {date_field: {'gte': 'now-1y', 'lt': 'now'}}
    
    for project_name in projects:
        
        all_last_year = get_active_authors(source, project_name, date_field, date_filter)
        survey_last_year = get_active_survey(source, project_name, date_field, date_filter)
        
        coverage_last_year = 0
        if all_last_year > 0:
            coverage_last_year = (survey_last_year/all_last_year)*100
            print('[', source ,'] 1 YEAR', project_name, ': (', survey_last_year,'/', all_last_year,')',
                  round(coverage_last_year, 2))
        else:
            print('[', source ,'] No authors found for', project_name)
                           
        #last_year_coverage_df.loc[len(last_year_coverage_df)] = [source, project_name, coverage_last_year]
        
        # Compute the same for the whole history
        all_nonemp = get_active_authors(source, project_name, date_field)
        survey = get_active_survey(source, project_name, date_field)

        coverage_total = 0
        if all_nonemp > 0:
            coverage_total = (survey/all_nonemp)*100
            print('[', source ,'] TOTAL ', project_name, ': (', survey,'/', all_nonemp,')',
                  round(coverage_total, 2))
        else:
            print('[', source ,'] No authors found for', project_name)
                           
        coverage_df.loc[len(coverage_df)] = [source, project_name, 
                                             '( ' + str(survey_last_year) + ' / ' + str(all_last_year) + ' )',
                                             round(coverage_last_year, 2), 
                                             '( ' + str(survey) + ' / ' + str(all_nonemp) + ' )',
                                             round(coverage_total, 2)]
    
    print()

ut.print_table(coverage_df)

Identities with UUID found in survey file:  327
Unique identities found:  327

[ git ] 1 YEAR Gecko : ( 22 / 797 ) 2.76
[ git ] TOTAL  Gecko : ( 41 / 4245 ) 0.97
[ git ] 1 YEAR Rust : ( 18 / 729 ) 2.47
[ git ] TOTAL  Rust : ( 25 / 2400 ) 1.04
[ git ] 1 YEAR Servo : ( 15 / 353 ) 4.25
[ git ] TOTAL  Servo : ( 22 / 1282 ) 1.72
[ git ] 1 YEAR Firefox : ( 5 / 209 ) 2.39
[ git ] TOTAL  Firefox : ( 16 / 1133 ) 1.41
[ git ] 1 YEAR WebVR : ( 6 / 162 ) 3.7
[ git ] TOTAL  WebVR : ( 6 / 221 ) 2.71
[ git ] 1 YEAR Firefox OS (FxOS / B2G) : ( 0 / 1 ) 0.0
[ git ] TOTAL  Firefox OS (FxOS / B2G) : ( 9 / 92 ) 9.78
[ git ] 1 YEAR All : ( 110 / 3142 ) 3.5
[ git ] TOTAL  All : ( 159 / 13351 ) 1.19

[ bugzilla ] 1 YEAR Gecko : ( 30 / 3721 ) 0.81
[ bugzilla ] TOTAL  Gecko : ( 97 / 62212 ) 0.16
[ bugzilla ] No authors found for Rust
[ bugzilla ] No authors found for Rust
[ bugzilla ] No authors found for Servo
[ bugzilla ] No authors found for Servo
[ bugzilla ] 1 YEAR Firefox : ( 24 / 2384 ) 1.01
[ bugzilla ]