In [1]:
from elasticsearch import Elasticsearch
from Queue import Queue
from threading import Thread
import time


import esinitialization as esinit # contains all the functions to create index and mapping, clean rows, delete index
import esquery as esq # contains all the functions to cater the queries

### Initiate Elastic Search Instance

In [2]:
es = Elasticsearch(hosts=["127.0.0.1:9200"], timeout=5000)
esinit.clean_index(es)
esinit.new_index_mapping(es)

Index exist: Deletion of index in process
New index created
New mapping created


### Stream data insertion to Elastic Search DB

In [3]:
q = Queue(maxsize=0) #queue to keep the incoming event data
num_threads = 10

# Function to get the event and store it in the queue
def get_stream(file_location):
    file_location.seek(0,2)
    timeout = time.time() + 10 # Iteration ends 10 seconds after stream ends
    while True:
        line = file_location.readline()
        if line:
            timeout = time.time() + 10 # Iteration ends 10 seconds after stream ends
        else:
            if(time.time() > timeout):
                break
            else:
                continue
        q.put(line)

# Target function for threading, cleans the data and index it is the elastic search DB
def es_insert(q):
    while True:
        clean_log = esinit.set_row(q.get())
        es.index(index= clean_log['_index'], body= clean_log['_source'], doc_type= clean_log['_type'])
        q.task_done()

In [4]:
logfile = open("streaming_git.log", "r")

get_stream(logfile)


for i in range(num_threads):
    worker = Thread(target= es_insert, args=(q,))
    worker.setDaemon(True)
    worker.start()

q.join()
    
logfile.close()

### task3 - Answering the queries

In [5]:
# 1. Total number of teams
esq.total_number_teams(es)

1. Total number of teams: 5


In [6]:
# 2. Number of active repos in each team
# 3. Total number of events per team
esq.events_repos_teams(es)

2. Number of active repos in each team: 
	Team: A		Active repos: 1
	Team: C		Active repos: 4
	Team: D		Active repos: 2
	Team: E		Active repos: 3
	Team: B		Active repos: 3

3. Total number of events per team: 
	Team: A		Events: 57
	Team: C		Events: 56
	Team: D		Events: 52
	Team: E		Events: 51
	Team: B		Events: 48


In [7]:
# 4. Total number of events overall
esq.total_number_events(es)

4. Total Number of events overall: 271


In [8]:
# 5. Frequence of event types overall
esq.event_frequency_overall(es)

5. Frequency of event types overall: 
	Event type: issue		Events: 90
	Event type: push		Events: 79
	Event type: issue-comment		Events: 57
	Event type: pull-request		Events: 45


In [9]:
# 6. Frequence of event types per team
esq.event_frequency_team(es)

6. Frequency of event types per team: 
	Team: A
		Event type: issue		Events: 24
		Event type: push		Events: 17
		Event type: issue-comment		Events: 10
		Event type: pull-request		Events: 10
	Team: C
		Event type: issue		Events: 19
		Event type: push		Events: 16
		Event type: pull-request		Events: 14
		Event type: issue-comment		Events: 11
	Team: E
		Event type: issue		Events: 24
		Event type: push		Events: 15
		Event type: issue-comment		Events: 10
		Event type: pull-request		Events: 7
	Team: D
		Event type: push		Events: 20
		Event type: issue-comment		Events: 15
		Event type: issue		Events: 14
		Event type: pull-request		Events: 6
	Team: B
		Event type: push		Events: 15
		Event type: issue		Events: 12
		Event type: issue-comment		Events: 11
		Event type: pull-request		Events: 11


In [10]:
# 7. Frequency of event types per repo
esq.event_frequency_repo(es)

7. Frequency of event types per repo: 
	Repo: proj1
		Event type: push		Events: 48
		Event type: issue		Events: 47
		Event type: pull-request		Events: 27
		Event type: issue-comment		Events: 23
	Repo: proj2
		Event type: issue		Events: 24
		Event type: issue-comment		Events: 19
		Event type: push		Events: 18
		Event type: pull-request		Events: 9
	Repo: proj3
		Event type: issue		Events: 17
		Event type: push		Events: 13
		Event type: issue-comment		Events: 11
		Event type: pull-request		Events: 8
	Repo: proj4
		Event type: issue		Events: 5
		Event type: issue-comment		Events: 4
		Event type: pull-request		Events: 4
		Event type: push		Events: 4


In [11]:
# 8. Average time difference between events per team
esq.event_time_difference_team(es)

8. Average Time Difference between events per team: 
	Team: A		Time Difference: 4419.3 milliseconds
	Team: C		Time Difference: 4748.55932203 milliseconds
	Team: D		Time Difference: 5056.57142857 milliseconds
	Team: E		Time Difference: 4949.33928571 milliseconds
	Team: B		Time Difference: 5370.48979592 milliseconds


In [12]:
# 9. Average time difference between same events per team
esq.same_event_time_difference_team(es)

9. Average Time Difference between same events per team: 
	Team: A
		Event: issue		Time Difference: 11528.6086957 milliseconds
		Event: push		Time Difference: 15884.3125 milliseconds
		Event: issue-comment		Time Difference: 23124.4444444 milliseconds
		Event: pull-request		Time Difference: 26459.6666667 milliseconds
	Team: C
		Event: issue		Time Difference: 12674.0 milliseconds
		Event: push		Time Difference: 18677.6666667 milliseconds
		Event: pull-request		Time Difference: 21089.3076923 milliseconds
		Event: issue-comment		Time Difference: 23213.9 milliseconds
	Team: D
		Event: push		Time Difference: 14219.0526316 milliseconds
		Event: issue-comment		Time Difference: 18296.4285714 milliseconds
		Event: issue		Time Difference: 20396.6923077 milliseconds
		Event: pull-request		Time Difference: 32304.5714286 milliseconds
	Team: E
		Event: issue		Time Difference: 11223.9565217 milliseconds
		Event: push		Time Difference: 19797.3571429 milliseconds
		Event: issue-comment		Time Difference: