In [1]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

import esinitialization as esinit # contains all the functions to create index and mapping, clean rows, delete index
import esquery as esq # contains all the functions to cater the queries

### Initiate Elastic Search Instance

In [2]:
es = Elasticsearch(hosts=["127.0.0.1:9200"], timeout=5000)
esinit.clean_index(es)
esinit.new_index_mapping(es)

Index exist: Deletion of index in process
New index created
New mapping created


### Load the data and bulk insertion to Elastic Search DB

In [3]:
log_list = []

with open("git-events.log", "r") as log_file:
    log_string = log_file.readlines()
    
for log in log_string:
    log_list.append(esinit.set_row(log))

In [4]:
log_list[182]

{'_index': 'events',
 '_source': {'created_at': '2017-09-19T19:39:24.312Z',
  'event': 'issue',
  'repo': 'proj1',
  'team': 'B'},
 '_type': 'event'}

In [5]:
success, _ = bulk(es, log_list, index = "events", raise_on_error = True)

### task3 - Answering the queries 

In [6]:
# 1. Total number of teams
esq.total_number_teams(es)

1. Total number of teams: 4


In [7]:
# 2. Number of active repos in each team
# 3. Total number of events per team
esq.events_repos_teams(es)

2. Number of active repos in each team: 
	Team: D		Active repos: 2
	Team: A		Active repos: 3
	Team: B		Active repos: 2
	Team: C		Active repos: 4

3. Total number of events per team: 
	Team: D		Events: 2452
	Team: A		Events: 2415
	Team: B		Events: 2360
	Team: C		Events: 2284


In [8]:
# 4. Total number of events overall
esq.total_number_events(es)

4. Total Number of events overall: 10016


In [9]:
# 5. Frequence of event types overall
esq.event_frequency_overall(es)

5. Frequency of event types overall: 
	Event type: push		Events: 3913
	Event type: issue		Events: 2839
	Event type: pull-request		Events: 1998
	Event type: issue-comment		Events: 1894


In [10]:
# 6. Frequence of event types per team
esq.event_frequency_team(es)

6. Frequency of event types per team: 
	Team: D
		Event type: push		Events: 1003
		Event type: issue		Events: 730
		Event type: issue-comment		Events: 501
		Event type: pull-request		Events: 490
	Team: A
		Event type: push		Events: 989
		Event type: issue		Events: 708
		Event type: pull-request		Events: 547
		Event type: issue-comment		Events: 479
	Team: B
		Event type: push		Events: 999
		Event type: issue		Events: 718
		Event type: pull-request		Events: 487
		Event type: issue-comment		Events: 442
	Team: C
		Event type: push		Events: 922
		Event type: issue		Events: 683
		Event type: pull-request		Events: 474
		Event type: issue-comment		Events: 472


In [11]:
# 7. Frequency of event types per repo
esq.event_frequency_repo(es)

7. Frequency of event types per repo: 
	Repo: proj1
		Event type: push		Events: 1606
		Event type: issue		Events: 1171
		Event type: pull-request		Events: 819
		Event type: issue-comment		Events: 751
	Repo: proj2
		Event type: push		Events: 1572
		Event type: issue		Events: 1099
		Event type: pull-request		Events: 760
		Event type: issue-comment		Events: 741
	Repo: proj3
		Event type: push		Events: 504
		Event type: issue		Events: 408
		Event type: pull-request		Events: 303
		Event type: issue-comment		Events: 295
	Repo: proj4
		Event type: push		Events: 231
		Event type: issue		Events: 161
		Event type: pull-request		Events: 116
		Event type: issue-comment		Events: 107


In [12]:
# 8. Average time difference between events per team
esq.event_time_difference_team(es)

8. Average Time Difference between events per team: 
	Team: D		Time Difference: 42.176276166 milliseconds
	Team: A		Time Difference: 42.1917707568 milliseconds
	Team: B		Time Difference: 43.3837429112 milliseconds
	Team: C		Time Difference: 45.0141176471 milliseconds


In [13]:
# 9. Average time difference between same events per team
esq.same_event_time_difference_team(es)

9. Average Time Difference between same events per team: 
	Team: D
		Event: push		Time Difference: 114.266467066 milliseconds
		Event: issue		Time Difference: 156.73388203 milliseconds
		Event: issue-comment		Time Difference: 228.092 milliseconds
		Event: pull-request		Time Difference: 233.010224949 milliseconds
	Team: A
		Event: push		Time Difference: 115.54757085 milliseconds
		Event: issue		Time Difference: 162.366336634 milliseconds
		Event: pull-request		Time Difference: 207.994505495 milliseconds
		Event: issue-comment		Time Difference: 239.80125523 milliseconds
	Team: B
		Event: push		Time Difference: 114.778557114 milliseconds
		Event: issue		Time Difference: 158.40027894 milliseconds
		Event: pull-request		Time Difference: 236.111111111 milliseconds
		Event: issue-comment		Time Difference: 259.410430839 milliseconds
	Team: C
		Event: push		Time Difference: 124.618892508 milliseconds
		Event: issue		Time Difference: 168.196480938 milliseconds
		Event: pull-request		Time Differe