In [31]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

import esinitialization as esinit # contains all the functions to create index and mapping, clean rows, delete index
import esquery as esq # contains all the functions to cater the queries

### Initiate Elastic Search Instance

In [32]:
es = Elasticsearch(hosts=["127.0.0.1:9200"], timeout=5000)
esinit.clean_index(es)
esinit.new_index_mapping(es)

Index exist: Deletion of index in process
New index created
New mapping created


### Load the data and bulk insertion to Elastic Search DB

In [33]:
log_list = []

with open("git-events.log", "r") as log_file:
    log_string = log_file.readlines()
    
for log in log_string:
    log_list.append(esinit.set_row(log))

In [34]:
log_list[182]

{'_index': 'events',
 '_source': {'created_at': '2017-09-19T19:39:24.312Z',
  'event': 'issue',
  'repo': 'proj1',
  'team': 'B'},
 '_type': 'event'}

In [35]:
success, _ = bulk(es, log_list, index = "events", raise_on_error = True)

### task3 - Answering the queries 

In [37]:
# 1. Total number of teams
esq.total_number_teams(es)

1. Total number of teams: 4


In [38]:
# 2. Number of active repos in each team
# 3. Total number of events per team
esq.events_repos_teams(es)

2. Number of active repos in each team: 
	Team: A		Active repos: 3
	Team: D		Active repos: 2
	Team: B		Active repos: 2
	Team: C		Active repos: 4

3. Total number of events per team: 
	Team: A		Events: 2537
	Team: D		Events: 2531
	Team: B		Events: 2462
	Team: C		Events: 2360


In [39]:
# 4. Total number of events overall
esq.total_number_events(es)

4. Total Number of events overall: 10378


In [40]:
# 5. Frequence of event types overall
esq.event_frequency_overall(es)

5. Frequency of event types overall: 
	Event type: push		Events: 3820
	Event type: issue		Events: 2769
	Event type: pull-request		Events: 1942
	Event type: issue-comment		Events: 1847


In [41]:
# 6. Frequence of event types per team
esq.event_frequency_team(es)

6. Frequency of event types per team: 
	Team: A
		Event type: push		Events: 970
		Event type: issue		Events: 691
		Event type: pull-request		Events: 531
		Event type: issue-comment		Events: 468
	Team: D
		Event type: push		Events: 975
		Event type: issue		Events: 718
		Event type: issue-comment		Events: 483
		Event type: pull-request		Events: 470
	Team: B
		Event type: push		Events: 976
		Event type: issue		Events: 701
		Event type: pull-request		Events: 475
		Event type: issue-comment		Events: 437
	Team: C
		Event type: push		Events: 899
		Event type: issue		Events: 659
		Event type: pull-request		Events: 466
		Event type: issue-comment		Events: 459


In [42]:
# 7. Frequency of event types per repo
esq.event_frequency_repo(es)

7. Frequency of event types per repo: 
	Repo: proj1
		Event type: push		Events: 1581
		Event type: issue		Events: 1145
		Event type: pull-request		Events: 796
		Event type: issue-comment		Events: 735
	Repo: proj2
		Event type: push		Events: 1512
		Event type: issue		Events: 1072
		Event type: pull-request		Events: 732
		Event type: issue-comment		Events: 717
	Repo: proj3
		Event type: push		Events: 502
		Event type: issue		Events: 396
		Event type: pull-request		Events: 299
		Event type: issue-comment		Events: 291
	Repo: proj4
		Event type: push		Events: 225
		Event type: issue		Events: 156
		Event type: pull-request		Events: 115
		Event type: issue-comment		Events: 104


In [43]:
# 8. Average time difference between events per team
esq.event_time_difference_team(es)

8. Average Time Difference between events per team: 
	Team: A		Time Difference: 43.1398496241 milliseconds
	Team: D		Time Difference: 43.4074074074 milliseconds
	Team: B		Time Difference: 44.2611046736 milliseconds
	Team: C		Time Difference: 46.2408376963 milliseconds


In [44]:
# 9. Average time difference between same events per team
esq.same_event_time_difference_team(es)

9. Average Time Difference between same events per team: 
	Team: A
		Event: push		Time Difference: 117.918556701 milliseconds
		Event: issue		Time Difference: 163.004341534 milliseconds
		Event: pull-request		Time Difference: 213.870056497 milliseconds
		Event: issue-comment		Time Difference: 244.925213675 milliseconds
	Team: D
		Event: push		Time Difference: 117.801025641 milliseconds
		Event: issue		Time Difference: 157.240947075 milliseconds
		Event: issue-comment		Time Difference: 231.548654244 milliseconds
		Event: pull-request		Time Difference: 241.559574468 milliseconds
	Team: B
		Event: push		Time Difference: 117.278688525 milliseconds
		Event: issue		Time Difference: 160.921540656 milliseconds
		Event: pull-request		Time Difference: 232.096842105 milliseconds
		Event: issue-comment		Time Difference: 261.519450801 milliseconds
	Team: C
		Event: push		Time Difference: 126.104560623 milliseconds
		Event: issue		Time Difference: 173.298937785 milliseconds
		Event: pull-request		Ti