In [1]:
from datetime import datetime
import pandas

import os

import plotly as plotly
import plotly.graph_objs as go

import util as ut

from util import ESConnection
from elasticsearch_dsl import Search

es_conn = ESConnection()

# Let's load projects from the REVIEWED SPREADSHEET
projects = ut.read_projects("data/Contributors and Communities Analysis - Project grouping.xlsx")

project_name = os.environ.get('PROJECT', 'All')
max_time = os.environ.get('MAX_TIME', '10')

# Range of years from 2017 to the past to be used in all metrics
# Use 0 as starting point together with lt in queries to exclude 2017
analyzed_range = range(0,int(max_time))

In [2]:
def create_search(source):
    s = Search(using=es_conn, index=source)
    # TODO: Add bot and merges filtering.
    #s = s.filter('range', grimoire_creation_date={'gt': 'now/M-2y', 'lt': 'now/M'})
    #s.params(timeout=100)
    return s

In [3]:
def get_authors_df(result, author_bucket_field):        

    # Get a dataframe with each author and their first commit
    buckets_result = result['aggregations'][author_bucket_field]['buckets']

    buckets = []
    for bucket_author in buckets_result:
        author = bucket_author['key']

        first = bucket_author['first']['hits']['hits'][0]
        first_commit = first['sort'][0]/1000
        last_commit = bucket_author['last_commit']['value']/1000
        org_name = first['_source']['author_org_name']
        repo_name = first['_source']['repo_name']
        #uuid = first['_source']['author_uuid']
        buckets.append({
                'first_commit': datetime.utcfromtimestamp(first_commit),
                'last_commit': datetime.utcfromtimestamp(last_commit),
                'author': author,
                #'uuid': uuid,
                'org': org_name,
                'repo_name': repo_name
        })
    authors_df = pandas.DataFrame.from_records(buckets)
    authors_df.sort_values(by='first_commit', ascending=False,
                            inplace=True)
    return authors_df

def get_active_authors_df(result, author_bucket_field, year):
    """Returns a dataframe with first and last commit of those authors
    whose last commit was made within a given year"""

    # Get a dataframe with each author and their first commit
    buckets_result = result['aggregations'][author_bucket_field]['buckets']

    buckets = []
    for bucket_author in buckets_result:
        author = bucket_author['key']

        first = bucket_author['first']['hits']['hits'][0]
        first_commit = first['sort'][0]/1000
        last_commit = bucket_author['last_commit']['value']/1000
        org_name = first['_source']['author_org_name']
        repo_name = first['_source']['repo_name']
        #uuid = first['_source']['author_uuid']
        if datetime.utcfromtimestamp(last_commit).year == year:
            buckets.append({
                    'first_commit': datetime.utcfromtimestamp(first_commit),
                    'last_commit': datetime.utcfromtimestamp(last_commit),
                    'author': author,
                    #'uuid': uuid,
                    'org': org_name,
                    'repo_name': repo_name
            })
    authors_df = pandas.DataFrame.from_records(buckets)
    authors_df.sort_values(by='first_commit', ascending=False,
                            inplace=True)
    return authors_df

In [4]:
def print_horizontal_bar_chart(df, experience_field, title, min_range = 0):
    
    plotly.offline.init_notebook_mode(connected=True)
    
    experience = list(range(min_range, int(df[experience_field].max()) + 1))
    
    people_count = []
    for exp in experience:
        people_count.append(len(df.loc[df[experience_field] == exp]))
        
    data = [go.Bar(
            x=people_count,
            y=experience,
            orientation = 'h'
    )]
    
    layout = go.Layout(
        barmode='group',
        title= title
    )

    fig = go.Figure(data=data, layout=layout)
    plotly.offline.iplot(fig, filename='horizontal-bar')

In [5]:
def add_general_date_filters(s):
    # 01/01/1998
    initial_ts = '883609200000'
    return s.filter('range', grimoire_creation_date={'gt': initial_ts})

def add_bot_filter(s):
    return s.filter('term', author_bot='false')

def add_merges_filter(s):
    return s.filter('range', files={'gt': 0})

def add_project_filter(s):
    if project_name.lower() != 'all':
        github = projects['Github']
        repos = github[github['Project'] == project_name]['Repo'].tolist()
        #print(repos)
        s = s.filter('terms', repo_name=repos)
    return s

In [6]:
results = []
for i in analyzed_range:

    # Buckets by author name, finding first commit for each of them
    s = Search(using=es_conn, index='git')
    s.params(timeout=30)

    # General filters
    s = add_general_date_filters(s)
    s = add_bot_filter(s)
    s = add_merges_filter(s)
    
    # Filter commits to the Project Repos
    s = add_project_filter(s)
    
    
    # Retrieve commits before given year
    s = s.filter('range', grimoire_creation_date={'lt': 'now-' + str(i) + 'y/y'})

    # Bucketize by uuid and get first and last commit
    s.aggs.bucket('authors', 'terms', field='author_uuid', size=100000) \
        .metric('first', 'top_hits', _source=['author_date', 'author_org_name', 'author_uuid', 'repo_name'],
                size=1, sort=[{"author_date": {"order": "asc"}}]) \
        .metric('last_commit', 'max', field='author_date')
    s = s.sort("author_date")
    #print(s.to_dict())
    results.append(s.execute())

 
authors_dfs = []
for result in results:
    authors_df = get_authors_df(result, author_bucket_field='authors')
    authors_df['active_years'] = (authors_df.last_commit-authors_df.first_commit).astype('timedelta64[Y]')
    authors_df['project'] = project_name
    authors_dfs.append(authors_df)

authors_dfs

[                                         author        first_commit  \
 11577  149ad981c7f8acd65b9cb9a3f306833fc4b6ec8e 2016-12-31 12:01:23   
 13024  778f8adac79c8593c8413792e9d838e0620797c1 2016-12-31 08:34:00   
 14349  f0a27b3276baf429f51f6afed8ece01f1a09e7d0 2016-12-31 05:11:58   
 12669  5c6bef06c5d1fa2e83f13db2046ec3d9d27bd00d 2016-12-30 23:09:23   
 13425  977b2b48ece5125093c2cfdedb56be00bdba4f13 2016-12-30 22:21:03   
 12596  57a1cf0d7a35c3f77475953873212148d3c01e1b 2016-12-30 21:55:02   
 10727  8c536fb5c49a281f793cb47e933fd71403891960 2016-12-30 17:26:13   
 13554  a1e6b9aca42df46460c5fed6a73cde615b2b04a3 2016-12-30 16:45:19   
 11875  26e5b20e7ccff92ec895b5f4af3f5f0b9a5b0f0a 2016-12-30 14:34:47   
 9905   09a899922407e40e04fa6ab865da040f06c0d904 2016-12-30 12:22:11   
 9612   c91f349eb811f5870fe7b4e987bd34decfbf72cb 2016-12-30 11:34:16   
 13171  829a9b7eae96736b2408c62c4b2843721116fe41 2016-12-29 23:33:52   
 13884  c19b5436d45eb06bb0d9a16accd4a16da1ad9765 2016-12-29 14:2

## Time from first to last contrib for authors who made a commit before a given year 

Next plot shows the number of authors grouped by time from their first to last contribution. This give us an idea of how long contributors are around the community. In this chart we don't have any clue of their activity in that period, just a quick and approximate glance of the time they remain around the community.

**Long bars in group of 0 years of experiece means that there are many people who made their first and last contributions whithin the same year along the whole period**. That is, the accumulated sum of people who made all their contributions within same year from 1998.

* Y axis corresponds to the difference in years from first to last contributions.
* X axis corresponds to the number of contributors in the given group.
* Each plot shows a snapshot of this information from the specified year to the past (1998 was chosen as the oldest date to get results from). 

In [7]:
# Plot bar charts for each dataframe
i = 0
for authors_df in authors_dfs:
#    print(author_df['experience_years'].max(), type(author_df['experience_years'].max()))
    print_horizontal_bar_chart(authors_df, 'active_years', title=project_name + ' ' + str(2016 - i))
    i += 1

## Time from first to last commit for authors active in a given year

We define an author as **active** iff she made at least one commit within a given year. E.g. an author would be considered active in 2017 if she made a commit after Jan. 1st, 2017 and before Dec. 31st 2017. 

In other words, the difference with previous plots lies in having into account only contributors who made their last contribution in the year we are visualizing data from.

* Y axis corresponds to the difference in years from first to last contributions.
* X axis corresponds to the number of contributors in the given group.
* Each plot shows a snapshot of this information from the specified year to the past (1998 was chosen as the oldest date to get results from). 

In [8]:
active_authors_dfs = []
year = 2016
for result in results:
    authors_df = get_active_authors_df(result, author_bucket_field='authors', year=year)
    authors_df['active_years'] = (authors_df.last_commit-authors_df.first_commit).astype('timedelta64[Y]')
    authors_df['project'] = project_name
    active_authors_dfs.append(authors_df)
    year -= 1

active_authors_dfs

[                                        author        first_commit  \
 3526  149ad981c7f8acd65b9cb9a3f306833fc4b6ec8e 2016-12-31 12:01:23   
 3908  778f8adac79c8593c8413792e9d838e0620797c1 2016-12-31 08:34:00   
 4265  f0a27b3276baf429f51f6afed8ece01f1a09e7d0 2016-12-31 05:11:58   
 3815  5c6bef06c5d1fa2e83f13db2046ec3d9d27bd00d 2016-12-30 23:09:23   
 4018  977b2b48ece5125093c2cfdedb56be00bdba4f13 2016-12-30 22:21:03   
 3796  57a1cf0d7a35c3f77475953873212148d3c01e1b 2016-12-30 21:55:02   
 3281  8c536fb5c49a281f793cb47e933fd71403891960 2016-12-30 17:26:13   
 4049  a1e6b9aca42df46460c5fed6a73cde615b2b04a3 2016-12-30 16:45:19   
 3598  26e5b20e7ccff92ec895b5f4af3f5f0b9a5b0f0a 2016-12-30 14:34:47   
 3033  09a899922407e40e04fa6ab865da040f06c0d904 2016-12-30 12:22:11   
 2967  c91f349eb811f5870fe7b4e987bd34decfbf72cb 2016-12-30 11:34:16   
 3946  829a9b7eae96736b2408c62c4b2843721116fe41 2016-12-29 23:33:52   
 4140  c19b5436d45eb06bb0d9a16accd4a16da1ad9765 2016-12-29 14:21:07   
 3607 

In [9]:
# Plot bar charts for each dataframe
i = 0
for authors_df in active_authors_dfs:
#    print(author_df['experience_years'].max(), type(author_df['experience_years'].max()))
    print_horizontal_bar_chart(authors_df, 'active_years', title=project_name + ' ' + str(2016 - i))
    i += 1

## Years of Experience
We consider **12 commits** per year, i.e. one commit per month aprox., as a minimum to add one year of experience to a given author. From this assumption, we build groups of authors by years of experience. As a result, we present a plot with number of people in each group.

To give a more complete idea of how community evolves, we plot snapshots corresponding to different years. Each of them will take all commits sent until the given year, and calculate years of experience for all authors in that slice.

We are also counting authors whose last year of experience is the one we are analyzing data from. That is, if we are looking to year 2016, we only count those authors who made at least 12 commits in 2016. From there we add 1 year of experience for each year they fulfill this condition.

* Y axis corresponds to years of experience as defined above.
* X axis corresponds to the umber of contributors in the given group.
* Each plot shows a snapshot of this information from the specified year to the past (1998 was chosen as the oldest date to get results from). 

In [10]:
###
## GET COMMITS BY YEAR AND AUTHOR
###

results = []
min_commits = 1

for i in analyzed_range:

    # Buckets by author name, finding first commit for each of them
    s = create_search(source='git')
    
    # General filters
    s = add_general_date_filters(s)
    s = add_bot_filter(s)
    s = add_merges_filter(s)
    
    # Filter commits to the Project Repos
    s = add_project_filter(s)
    
    # Retrieve commits before given year
    s = s.filter('range', grimoire_creation_date={'lt': 'now-' + str(i) + 'y/y'})

    # Bucketize by time, uuid and organization, then count commits per year
    s.aggs.bucket('time', 'date_histogram', field='grimoire_creation_date', interval='year') \
        .bucket('authors', 'terms', field='author_uuid', size=100000, min_doc_count=min_commits) \
        .bucket('org', 'terms', field='author_org_name', size=1) \
        .metric('commits', 'cardinality', field='hash', precision_threshold=1000)

    r = s.execute()
    # In case you need to check response, uncomment line below
    #print(r.to_dict()['aggregations']['time']['buckets'])
        
    results.append(r)
    
#results


In [11]:
###
## CREATE A DF CONTAINING, FOR EACH AUTHOR UUID, COUNT OF YEARS OF EXPERIENCE (YEARS
## WITH MORE THAN 12 COMMITS MADE) AND LAST YEAR ACTIVE
###
exp_df_list = []
year = 2016

for result in results:
    exp_df = ut.to_df_by_time(result, 'Author', 'Time', 'Commits', 'Org', 'authors', 'time', 'commits', 'org')
    exp_df['Time'] = exp_df['Time'].apply(lambda x: str(pandas.Period(x,'A')))
    
    ## ACTIVE CONDITION
    ## Filter those having less than 12 commits per year
    exp_df = exp_df[exp_df['Commits'] >= 12]
    
    ## Group by author, get MAX YEAR and NUMBER OF ROWS FOR THE GIVEN AUTHOR
    exp_df = exp_df.groupby(['Author', 'Org']).agg({'Time': 'max', 'Commits': 'count'})
    ## Filter those whose last active year is not the one we want
    exp_df = exp_df[exp_df['Time'] == str(year)]
    
    exp_df['exp'] = exp_df['Commits']
    exp_df['last_active'] = exp_df['Time']
    exp_df= exp_df.drop('Commits', axis=1)
    exp_df = exp_df.drop('Time', axis=1)
    
    exp_df['project'] = project_name
    
    exp_df_list.append(exp_df)
    
    year -= 1

exp_df_list

[                                                        exp last_active  \
 Author                                   Org                              
 000063c4e47e93ab3b30607680609e4d2500ce5d Mozilla Staff    3        2016   
 0002c6a09a45af6481c35e29c0ed7f3bdbecb3b8 Community        1        2016   
 000336e205e3f5e2daa6e7a0b03e612b3b2b02d3 Mozilla Staff    3        2016   
 0014e7cebbe8ef11dab9eeb5f2111e01d9eea378 Mozilla Staff    6        2016   
 002893ffe1425c220756f8ba4c78e1e3bb0be50f Mozilla Staff    6        2016   
 0043f3e1632e14bb05873fc981efa44057a38652 Community        1        2016   
 006fee3e0de309ff1e3e89c24f5e7032163c49c5 Mozilla Staff    5        2016   
 0077ed8ce0df63bf273d022edae59fc2664972dc Mozilla Staff    1        2016   
 00834d313bfc6fc60be1631bcc57b2c05ee2e0e3 Mozilla Staff    8        2016   
 00846eff46b051d92317fc74e54041c6fdccd7cf Mozilla Staff    9        2016   
 00a40f9e9e7f7633ddab8291a99e1e487f88481c Community        2        2016   
 00b93401298

In [12]:
# Plot bar charts for each dataframe
i = 0
for exp_df in exp_df_list:
    if not exp_df.empty:
        print_horizontal_bar_chart(exp_df, 'exp', title=project_name + ' ' + str(2016 - i), min_range=1)
        
        employees_df = exp_df[[group2 in ['Mozilla Staff'] for group1, group2 in exp_df.index]]
        if len(employees_df) > 0:
            print_horizontal_bar_chart(employees_df, 'exp', title=project_name + ' employees ' + str(2016 - i), 
                                       min_range=1)
        
        non_employees_df = exp_df[[group2 not in ['Mozilla Staff'] for group1, group2 in exp_df.index]]
        if len(non_employees_df):
            print_horizontal_bar_chart(non_employees_df, 'exp', 
                                       title=project_name + ' non-employees ' + str(2016 - i), min_range=1)
    i += 1

### Evolution of Experience

Next table and plot show how each group changes over time. This way we can visualize how new people come and remain in the community. It is worth to note that we are not following a given group of people through time (it could be done following diagonals in the table, we look at this in the next section), but looking at how a given group changes from one year to another. 

For instance, if we look at the group of 2 years of experience in 2008 we see we had 204 people. If we look at the same group in 2009 we see that our **new group** of people accumulating 2 years of experience has 105 people. So, it seems we have fewer people with 2 years of experience in 2009. If we look at 2016 we find 226 people with two years of experience, so we have more people with 2 years of experience nowadays than we had 8 years ago.

Table can be read as follows:

* Cell values corresponds to the number of contributors in the given group.
* Rows corresponds to groups based on years of experience.
* Columns corresponds to years we are analyzing. 

In [13]:
exp_groups_evo_df = pandas.DataFrame(columns=['exp'])

for exp_df in exp_df_list:
    
    if exp_df.empty:
        continue
    
    year = exp_df['last_active'].unique()[0]
    exp_groups_df = pandas.DataFrame(columns=['exp', year])
    
    experience = list(range(1, int(exp_df['exp'].max()) + 1))
    
    for exp in experience:
        count = len(exp_df.loc[exp_df['exp'] == exp])
        exp_groups_df.loc[len(exp_groups_df)] = [exp, count]

    exp_groups_evo_df = exp_groups_evo_df.merge(exp_groups_df, on='exp', how='outer')


# Fill Nan with 0's
exp_groups_evo_df = exp_groups_evo_df.fillna(0)

# Reorder columns
exp_groups_evo_df = exp_groups_evo_df.set_index('exp')
exp_groups_evo_df = exp_groups_evo_df.sort_index(axis=1)


#print('Max. Exp: ', exp_groups_evo_df['exp'].max(), 'Max. Count: ')
exp_groups_evo_df

Unnamed: 0_level_0,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
exp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,293.0,180.0,180.0,203.0,312.0,369.0,435.0,493.0,465.0,434.0
2.0,50.0,204.0,105.0,106.0,133.0,211.0,206.0,225.0,212.0,224.0
3.0,33.0,37.0,91.0,79.0,80.0,98.0,149.0,154.0,163.0,123.0
4.0,28.0,16.0,5.0,83.0,68.0,59.0,82.0,119.0,109.0,123.0
5.0,27.0,18.0,0.0,4.0,74.0,58.0,49.0,67.0,100.0,86.0
6.0,18.0,9.0,1.0,0.0,4.0,62.0,48.0,46.0,54.0,86.0
7.0,10.0,8.0,1.0,1.0,0.0,3.0,52.0,42.0,42.0,50.0
8.0,14.0,4.0,1.0,1.0,1.0,0.0,2.0,47.0,37.0,38.0
9.0,5.0,6.0,1.0,0.0,1.0,0.0,1.0,2.0,43.0,34.0
10.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,2.0,36.0


### Employees

In [14]:
exp_groups_evo_moz_df = pandas.DataFrame(columns=['exp'])

for exp_df in exp_df_list:
    
    if exp_df.empty:
        continue
        
    exp_df = exp_df[[group2 in ['Mozilla Staff'] for group1, group2 in exp_df.index]]
    
    year = exp_df['last_active'].unique()[0]
    exp_groups_df = pandas.DataFrame(columns=['exp', year])
    
    experience = list(range(1, int(exp_df['exp'].max()) + 1))
    
    for exp in experience:
        count = len(exp_df.loc[exp_df['exp'] == exp])
        exp_groups_df.loc[len(exp_groups_df)] = [exp, count]

    exp_groups_evo_moz_df = exp_groups_evo_moz_df.merge(exp_groups_df, on='exp', how='outer')


# Fill Nan with 0's
exp_groups_evo_moz_df = exp_groups_evo_moz_df.fillna(0)

# Reorder columns
exp_groups_evo_moz_df = exp_groups_evo_moz_df.set_index('exp')
exp_groups_evo_moz_df = exp_groups_evo_moz_df.sort_index(axis=1)


#print('Max. Exp: ', exp_groups_evo_moz_df['exp'].max(), 'Max. Count: ')
exp_groups_evo_moz_df

Unnamed: 0_level_0,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
exp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,120.0,93.0,93.0,112.0,186.0,219.0,226.0,188.0,129.0,114.0
2.0,8.0,96.0,76.0,75.0,89.0,152.0,159.0,158.0,117.0,103.0
3.0,0.0,7.0,83.0,61.0,63.0,75.0,123.0,132.0,124.0,85.0
4.0,1.0,0.0,5.0,75.0,57.0,48.0,70.0,102.0,103.0,99.0
5.0,1.0,1.0,0.0,4.0,66.0,51.0,42.0,59.0,90.0,84.0
6.0,0.0,1.0,1.0,0.0,4.0,56.0,44.0,38.0,50.0,77.0
7.0,1.0,0.0,1.0,1.0,0.0,3.0,47.0,38.0,35.0,46.0
8.0,1.0,1.0,0.0,1.0,1.0,0.0,2.0,44.0,33.0,31.0
9.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,2.0,40.0,30.0
10.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,2.0,35.0


### Non-employees

In [15]:
exp_groups_evo_others_df = pandas.DataFrame(columns=['exp'])

for exp_df in exp_df_list:
    
    if exp_df.empty:
        continue
        
    exp_df = exp_df[[group2 not in ['Mozilla Staff'] for group1, group2 in exp_df.index]]
    
    if (len(exp_df) == 0):
        continue
    
    year = exp_df['last_active'].unique()[0]
    exp_groups_df = pandas.DataFrame(columns=['exp', year])
    
    experience = list(range(1, int(exp_df['exp'].max()) + 1))
    
    for exp in experience:
        count = len(exp_df.loc[exp_df['exp'] == exp])
        exp_groups_df.loc[len(exp_groups_df)] = [exp, count]

    exp_groups_evo_others_df = exp_groups_evo_others_df.merge(exp_groups_df, on='exp', how='outer')


# Fill Nan with 0's
exp_groups_evo_others_df = exp_groups_evo_others_df.fillna(0)

# Reorder columns
exp_groups_evo_others_df = exp_groups_evo_others_df.set_index('exp')
exp_groups_evo_others_df = exp_groups_evo_others_df.sort_index(axis=1)


#print('Max. Exp: ', exp_groups_evo_others_df['exp'].max(), 'Max. Count: ')
exp_groups_evo_others_df

Unnamed: 0_level_0,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
exp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,173.0,87.0,87.0,91.0,126.0,150.0,209.0,305.0,336.0,320.0
2.0,42.0,108.0,29.0,31.0,44.0,59.0,47.0,67.0,95.0,121.0
3.0,33.0,30.0,8.0,18.0,17.0,23.0,26.0,22.0,39.0,38.0
4.0,27.0,16.0,0.0,8.0,11.0,11.0,12.0,17.0,6.0,24.0
5.0,26.0,17.0,0.0,0.0,8.0,7.0,7.0,8.0,10.0,2.0
6.0,18.0,8.0,0.0,0.0,0.0,6.0,4.0,8.0,4.0,9.0
7.0,9.0,8.0,0.0,0.0,0.0,0.0,5.0,4.0,7.0,4.0
8.0,13.0,3.0,1.0,0.0,0.0,0.0,0.0,3.0,4.0,7.0
9.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0
10.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


Next plot can be read as follows:
* Y axis corresponds to the number of contributors in the given group.
* X axis corresponds to years we are looking through.
* Each line corresponds to a given group based on their years of experience. 

In [16]:
plotly.offline.init_notebook_mode(connected=True)

data = []
for exp in exp_groups_evo_df.index.values:
    #print(exp, '\n', exp_groups_evo_df.loc[exp].tolist(), '\n', exp_groups_evo_df.loc[exp].index.values)
    data.append(
        go.Scatter(
            x = exp_groups_evo_df.loc[exp].index.values,
            y = exp_groups_evo_df.loc[exp].tolist(),
            mode = 'lines+markers',
            name = str(int(exp)) + ' years'
        )
    )
    


plotly.offline.iplot(data, filename='line-mode')    