In [1]:
import utils
#utils.logging.basicConfig(level=utils.logging.INFO)
settings = utils.read_config_file('settings.yml')
es = utils.establish_connection(settings['es_host'])

In [2]:
MAPPING_GIT = {
    "mappings": {
        "item": {
            "properties": {
                "date": {
                    "type": "date",
                    "format" : "E MMM d HH:mm:ss yyyy Z",
                    "locale" : "US"
                },
                "commit_id": {"type": "keyword"},
                "contributor_name": {"type": "keyword"},
                "contributor_email_domain": {"type": "keyword"},
                "file": {"type": "keyword"},
                "lines_added": {"type": "integer"},
                "lines_removed": {"type": "integer"},
                "github_owner": {"type": "keyword"},
                "github_repository": {"type": "keyword"}
            }
        }
    }
}

In [3]:
from perceval.backends.core.git import Git
from perceval.backends.core.github import GitHub

In [4]:
utils.create_ES_index(es, 'github-git', MAPPING_GIT)

In [None]:
for repo_url in settings['github-repo']:
    
    repo_owner = repo_url.split('/')[-2]
    repo_name = repo_url.split('/')[-1]
    repo_git_url = repo_url + '.git'
    
    git_repo = Git(uri=repo_git_url, gitpath='/tmp/'+repo_name)
    
    for commit in git_repo.fetch():
        
        contributor_name = commit['data']['Author'].split('<')[0][:-1]
        contributor_email_domain = commit['data']['Author'].split('@')[-1][:-1]
        
        for file in commit['data']['files']:
            if 'added' not in file.keys() or file['added'] == '-': 
                file['added'] = 0
            if 'removed' not in file.keys() or file['removed'] == '-':
                file['removed'] = 0
            
            summary = {
                'date': commit['data']['AuthorDate'], 
                'commit_id': commit['data']['commit'],
                'contributor_name': contributor_name, 
                'contributor_email_domain': contributor_email_domain,
                'file': file['file'],
                'lines_added': file['added'], 
                'lines_removed': file['removed'],
                'github_owner': repo_owner, 'github_repository': repo_name
            }
            
            print(summary)
            
            es.index(index='github-git', doc_type='item', body=summary)

{'file': '.gitignore', 'contributor_name': 'Santiago Dueñas', 'lines_added': '10', 'lines_removed': '0', 'commit_id': 'dc78c254e464ff334892e0448a23e4cfbfc637a3', 'github_owner': 'grimoirelab', 'contributor_email_domain': 'bitergia.com', 'date': 'Tue Aug 18 18:08:27 2015 +0200', 'github_repository': 'perceval'}
{'file': 'AUTHORS', 'contributor_name': 'Santiago Dueñas', 'lines_added': '1', 'lines_removed': '0', 'commit_id': 'dc78c254e464ff334892e0448a23e4cfbfc637a3', 'github_owner': 'grimoirelab', 'contributor_email_domain': 'bitergia.com', 'date': 'Tue Aug 18 18:08:27 2015 +0200', 'github_repository': 'perceval'}
{'file': 'LICENSE', 'contributor_name': 'Santiago Dueñas', 'lines_added': '674', 'lines_removed': '0', 'commit_id': 'dc78c254e464ff334892e0448a23e4cfbfc637a3', 'github_owner': 'grimoirelab', 'contributor_email_domain': 'bitergia.com', 'date': 'Tue Aug 18 18:08:27 2015 +0200', 'github_repository': 'perceval'}
{'file': 'tests/__init__.py', 'contributor_name': 'Santiago Dueñas', '

In [None]:
MAPPING_GITHUB = {
    "mappings": {
        "item": {
            "properties": {
                "date": {
                    "type": "date",
                    "format" : "dateOptionalTime"
                },
                "contributor_github_username": {"type": "keyword"},
                "title": {"type": "string"},
                "state": {"type": "keyword"},
                "issue_type": {"type": "keyword"},
                "url": {"type": "keyword"},
                "comments": {"type": "integer"},
                "closed_at": {
                    "type": "date",
                    "format": "dateOptionalTime"
                },
                "time_to_solve": {"type": "integer"},
                "assignee_github_username": {"type": "keyword"},
                "github_owner": {"type": "keyword"},
                "github_repository": {"type": "keyword"}
            }
        }
    }
}

In [None]:
utils.create_ES_index(es, 'github-issues', MAPPING_GIT)

In [None]:
import datetime as datetime

for repo_url in settings['github-repo']:
    
    repo_owner = repo_url.split('/')[-2]
    repo_name = repo_url.split('/')[-1]
    repo_git_url = repo_url + '.git'
    
    github_repo = GitHub(owner=repo_owner, repository=repo_name, api_token=settings['github_token'])
    
    for issue in github_repo.fetch():
        created_at = issue['data']['created_at']
        if issue['data']['state'] == 'closed':
            closed_at = issue['data']['closed_at']
            creation_date = datetime.datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
            closing_date = datetime.datetime.strptime(closed_at, "%Y-%m-%dT%H:%M:%SZ")
            delta_time = (closing_date - creation_date).seconds
        else:
            delta_time = None
        summary = {
            'date': created_at, 
            'contributor_github_username': issue['data']['user_data']['login'],
            'title': issue['data']['title'],
            'state': issue['data']['state'],
            'url': issue['data']['html_url'],
            'comments': issue['data']['comments'],
            'closed_at': issue['data']['closed_at'],
            'time_to_solve': delta_time,
            'github_owner': repo_owner,
            'github_repository': repo_name
        }
        if issue['data']['assignee'] != None:
            summary['assignee_github_username'] = issue['data']['assignee']['login']
        else:
            summary['assignee_github_username'] = None
        if 'pull_request' in issue['data'].keys():
            summary['issue_type'] = 'pull-request'
        else:
            summary['issue_type'] = 'issue'
        es.index(index='github-issues', doc_type='item', body=summary)