In [1]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

import esinitialization as esinit # contains all the functions to create index and mapping, clean rows, delete index
import esquery as esq # contains all the functions to cater the queries

### Initiate Elastic Search Instance

In [2]:
es = Elasticsearch(hosts=["127.0.0.1:9200"], timeout=5000)
esinit.clean_index(es)
esinit.new_index_mapping(es)

Index exist: Deletion of index in process
New index created
New mapping created


### Load the data and bulk insertion to Elastic Search DB

In [3]:
log_list = []

with open("git-events.log", "r") as log_file:
    log_string = log_file.readlines()
    
for log in log_string:
    log_list.append(esinit.set_row(log))

In [11]:
log_list[182]

{'_index': 'events',
 '_source': {'created_at': '2017-09-19T19:39:24.312Z',
  'event': 'issue',
  'repo': 'proj1',
  'team': 'B'},
 '_type': 'event'}

In [4]:
success, _ = bulk(es, log_list, index = "events", raise_on_error = True)

In [5]:
print success

10870


In [14]:
#es.index(index=log_list[14]['_index'], body=log_list[14]['_source'], doc_type=log_list[14]['_type'])

### Answering the queries

In [6]:
# 1. Total number of teams
esq.total_number_teams(es)

1. Total number of teams: 4


In [17]:
# 2. Number of active repos in each team
# 3. Total number of events per team
esq.events_repos_teams(es)

2. Number of active repos in each team: 
	Team: A		Active repos: 3
	Team: D		Active repos: 2
	Team: B		Active repos: 2
	Team: C		Active repos: 4

3. Total number of events per team: 
	Team: A		Events: 2783
	Team: D		Events: 2772
	Team: B		Events: 2709
	Team: C		Events: 2606


In [18]:
# 4. Total number of events overall
esq.total_number_events(es)

4. Total Number of events overall: 10870


In [19]:
# 5. Frequence of event types overall
esq.event_frequency_overall(es)

5. Frequency of event types overall: 
	Event type: push		Events: 4000
	Event type: issue		Events: 2899
	Event type: pull-request		Events: 2036
	Event type: issue-comment		Events: 1935


In [20]:
# 6. Frequence of event types per team
esq.event_frequency_team(es)

6. Frequency of event types per team: 
	Team: A
		Event type: push		Events: 1009
		Event type: issue		Events: 726
		Event type: pull-request		Events: 557
		Event type: issue-comment		Events: 491
	Team: D
		Event type: push		Events: 1022
		Event type: issue		Events: 742
		Event type: issue-comment		Events: 508
		Event type: pull-request		Events: 500
	Team: B
		Event type: push		Events: 1023
		Event type: issue		Events: 736
		Event type: pull-request		Events: 497
		Event type: issue-comment		Events: 453
	Team: C
		Event type: push		Events: 946
		Event type: issue		Events: 695
		Event type: issue-comment		Events: 483
		Event type: pull-request		Events: 482


In [21]:
# 7. Frequency of event types per repo
esq.event_frequency_repo(es)

7. Frequency of event types per repo: 
	Repo: proj1
		Event type: push		Events: 1644
		Event type: issue		Events: 1191
		Event type: pull-request		Events: 838
		Event type: issue-comment		Events: 769
	Repo: proj2
		Event type: push		Events: 1597
		Event type: issue		Events: 1131
		Event type: pull-request		Events: 770
		Event type: issue-comment		Events: 750
	Repo: proj3
		Event type: push		Events: 518
		Event type: issue		Events: 414
		Event type: pull-request		Events: 309
		Event type: issue-comment		Events: 303
	Repo: proj4
		Event type: push		Events: 241
		Event type: issue		Events: 163
		Event type: pull-request		Events: 119
		Event type: issue-comment		Events: 113


In [22]:
# 8. Average time difference between events per team
esq.event_time_difference_team(es)

8. Average Time Difference between events per team: 
	Team: A		Time Difference: 41.2669780812 seconds
	Team: D		Time Difference: 41.4343434343 seconds
	Team: B		Time Difference: 42.3588039867 seconds
	Team: C		Time Difference: 44.0909439754 seconds


In [23]:
# 9. Average time difference between same events per team
esq.same_event_time_difference_team(es)

9. Average Time Difference between same events per team: 
	Team: A
		Event: push		Time Difference: 113.654112983 seconds
		Event: issue		Time Difference: 158.11707989 seconds
		Event: pull-request		Time Difference: 203.886894075 seconds
		Event: issue-comment		Time Difference: 233.452138493 seconds
	Team: D
		Event: push		Time Difference: 112.383561644 seconds
		Event: issue		Time Difference: 154.002695418 seconds
		Event: issue-comment		Time Difference: 224.5 seconds
		Event: pull-request		Time Difference: 227.884 seconds
	Team: B
		Event: push		Time Difference: 112.036168133 seconds
		Event: issue		Time Difference: 155.349184783 seconds
		Event: pull-request		Time Difference: 230.885311871 seconds
		Event: issue-comment		Time Difference: 252.863134658 seconds
	Team: C
		Event: push		Time Difference: 121.325581395 seconds
		Event: issue		Time Difference: 165.050359712 seconds
		Event: issue-comment		Time Difference: 237.799171843 seconds
		Event: pull-request		Time Difference: 236.854