In [13]:
from elasticsearch import Elasticsearch
from Queue import Queue
from threading import Thread
import time


import esinitialization as esinit # contains all the functions to create index and mapping, clean rows, delete index
import esquery as esq # contains all the functions to cater the queries

### Initiate Elastic Search Instance

In [14]:
es = Elasticsearch(hosts=["127.0.0.1:9200"], timeout=5000)
esinit.clean_index(es)
esinit.new_index_mapping(es)

Index exist: Deletion of index in process
New index created
New mapping created


### Stream data insertion to Elastic Search DB

In [15]:
q = Queue(maxsize=0) #queue to keep the incoming event data
num_threads = 10

# Function to get the event and store it in the queue
def get_stream(file_location):
    file_location.seek(0,2)
    timeout = time.time() + 10 # Iteration ends 10 seconds after stream ends
    while True:
        line = file_location.readline()
        if line:
            timeout = time.time() + 10 # Iteration ends 10 seconds after stream ends
        else:
            if(time.time() > timeout):
                break
            else:
                continue
        q.put(line)

# Target function for threading, cleans the data and index it is the elastic search DB
def es_insert(q):
    while True:
        clean_log = esinit.set_row(q.get())
        es.index(index= clean_log['_index'], body= clean_log['_source'], doc_type= clean_log['_type'])
        q.task_done()

In [16]:
logfile = open("streaming_git.log", "r")

get_stream(logfile)


for i in range(num_threads):
    worker = Thread(target= es_insert, args=(q,))
    worker.setDaemon(True)
    worker.start()

q.join()
    
logfile.close()

### task3 - Answering the queries

In [17]:
# 1. Total number of teams
esq.total_number_teams(es)

1. Total number of teams: 5


In [18]:
# 2. Number of active repos in each team
# 3. Total number of events per team
esq.events_repos_teams(es)

2. Number of active repos in each team: 
	Team: D		Active repos: 2
	Team: E		Active repos: 3
	Team: B		Active repos: 3
	Team: A		Active repos: 1
	Team: C		Active repos: 4

3. Total number of events per team: 
	Team: D		Events: 87
	Team: E		Events: 82
	Team: B		Events: 62
	Team: A		Events: 55
	Team: C		Events: 52


In [19]:
# 4. Total number of events overall
esq.total_number_events(es)

4. Total Number of events overall: 348


In [20]:
# 5. Frequence of event types overall
esq.event_frequency_overall(es)

5. Frequency of event types overall: 
	Event type: push		Events: 108
	Event type: issue		Events: 105
	Event type: pull-request		Events: 69
	Event type: issue-comment		Events: 66


In [21]:
# 6. Frequence of event types per team
esq.event_frequency_team(es)

6. Frequency of event types per team: 
	Team: D
		Event type: issue		Events: 34
		Event type: push		Events: 23
		Event type: issue-comment		Events: 17
		Event type: pull-request		Events: 15
	Team: E
		Event type: issue		Events: 26
		Event type: issue-comment		Events: 22
		Event type: push		Events: 20
		Event type: pull-request		Events: 18
	Team: B
		Event type: issue		Events: 21
		Event type: push		Events: 21
		Event type: issue-comment		Events: 11
		Event type: pull-request		Events: 11
	Team: A
		Event type: push		Events: 24
		Event type: pull-request		Events: 13
		Event type: issue		Events: 11
		Event type: issue-comment		Events: 8
	Team: C
		Event type: push		Events: 20
		Event type: issue		Events: 13
		Event type: pull-request		Events: 12
		Event type: issue-comment		Events: 8


In [22]:
# 7. Frequency of event types per repo
esq.event_frequency_repo(es)

7. Frequency of event types per repo: 
	Repo: proj1
		Event type: issue		Events: 46
		Event type: push		Events: 45
		Event type: pull-request		Events: 32
		Event type: issue-comment		Events: 30
	Repo: proj2
		Event type: issue		Events: 40
		Event type: push		Events: 39
		Event type: issue-comment		Events: 24
		Event type: pull-request		Events: 20
	Repo: proj3
		Event type: issue		Events: 17
		Event type: push		Events: 16
		Event type: pull-request		Events: 12
		Event type: issue-comment		Events: 10
	Repo: proj4
		Event type: push		Events: 8
		Event type: pull-request		Events: 5
		Event type: issue		Events: 2
		Event type: issue-comment		Events: 2


In [23]:
# 8. Average time difference between events per team
esq.event_time_difference_team(es)

8. Average Time Difference between events per team: 
	Team: D		Time Difference: 3840.14606742 milliseconds
	Team: E		Time Difference: 4102.39534884 milliseconds
	Team: B		Time Difference: 5120.6875 milliseconds
	Team: A		Time Difference: 5744.66071429 milliseconds
	Team: C		Time Difference: 6448.45283019 milliseconds


In [24]:
# 9. Average time difference between same events per team
esq.same_event_time_difference_team(es)

9. Average Time Difference between same events per team: 
	Team: D
		Event: issue		Time Difference: 9933.88235294 milliseconds
		Event: push		Time Difference: 14161.0 milliseconds
		Event: issue-comment		Time Difference: 20104.2941176 milliseconds
		Event: pull-request		Time Difference: 20179.1333333 milliseconds
	Team: E
		Event: issue		Time Difference: 13530.9230769 milliseconds
		Event: issue-comment		Time Difference: 15306.1363636 milliseconds
		Event: push		Time Difference: 16988.55 milliseconds
		Event: pull-request		Time Difference: 18428.8333333 milliseconds
	Team: B
		Event: issue		Time Difference: 15222.4761905 milliseconds
		Event: push		Time Difference: 14986.0 milliseconds
		Event: issue-comment		Time Difference: 20861.6363636 milliseconds
		Event: pull-request		Time Difference: 23957.5454545 milliseconds
	Team: A
		Event: push		Time Difference: 11731.7083333 milliseconds
		Event: pull-request		Time Difference: 24281.8461538 milliseconds
		Event: issue		Time Difference: 29