In [1]:
from elasticsearch import Elasticsearch
from Queue import Queue
from threading import Thread
import time


import esinitialization as esinit # contains all the functions to create index and mapping, clean rows, delete index
import esquery as esq # contains all the functions to cater the queries

### Initiate Elastic Search Instance

In [2]:
es = Elasticsearch(hosts=["127.0.0.1:9200"], timeout=5000)
esinit.clean_index(es)
esinit.new_index_mapping(es)

Index exist: Deletion of index in process
New index created
New mapping created


### Stream data insertion to Elastic Search DB

In [3]:
q = Queue(maxsize=0) #queue to keep the incoming event data
num_threads = 10

# Function to get the event and store it in the queue
def get_stream(file_location):
    file_location.seek(0,2)
    timeout = time.time() + 10 # Iteration ends 10 seconds after stream ends
    while True:
        line = file_location.readline()
        if line:
            timeout = time.time() + 10 # Iteration ends 10 seconds after stream ends
        else:
            if(time.time() > timeout):
                break
            else:
                continue
        q.put(line)

# Target function for threading, cleans the data and index it is the elastic search DB
def es_insert(q):
    while True:
        clean_log = esinit.set_row(q.get())
        es.index(index= clean_log['_index'], body= clean_log['_source'], doc_type= clean_log['_type'])
        q.task_done()

In [4]:
logfile = open("streaming_git.log", "r")

get_stream(logfile)


for i in range(num_threads):
    worker = Thread(target= es_insert, args=(q,))
    worker.setDaemon(True)
    worker.start()

q.join()
    
logfile.close()

### task3 - Answering the queries

In [5]:
# 1. Total number of teams
esq.total_number_teams(es)

1. Total number of teams: 5


In [6]:
# 2. Number of active repos in each team
# 3. Total number of events per team
esq.events_repos_teams(es)

2. Number of active repos in each team: 
	Team: C		Active repos: 4
	Team: B		Active repos: 3
	Team: A		Active repos: 1
	Team: D		Active repos: 2
	Team: E		Active repos: 3

3. Total number of events per team: 
	Team: C		Events: 21
	Team: B		Events: 18
	Team: A		Events: 16
	Team: D		Events: 15
	Team: E		Events: 14


In [7]:
# 4. Total number of events overall
esq.total_number_events(es)

4. Total Number of events overall: 84


In [8]:
# 5. Frequence of event types overall
esq.event_frequency_overall(es)

5. Frequency of event types overall: 
	Event type: push		Events: 30
	Event type: issue		Events: 24
	Event type: issue-comment		Events: 20
	Event type: pull-request		Events: 10


In [9]:
# 6. Frequence of event types per team
esq.event_frequency_team(es)

6. Frequency of event types per team: 
	Team: C
		Event type: issue-comment		Events: 8
		Event type: issue		Events: 6
		Event type: push		Events: 5
		Event type: pull-request		Events: 2
	Team: B
		Event type: push		Events: 7
		Event type: issue-comment		Events: 5
		Event type: issue		Events: 4
		Event type: pull-request		Events: 2
	Team: A
		Event type: push		Events: 7
		Event type: issue		Events: 4
		Event type: pull-request		Events: 3
		Event type: issue-comment		Events: 2
	Team: D
		Event type: issue-comment		Events: 5
		Event type: push		Events: 5
		Event type: issue		Events: 3
		Event type: pull-request		Events: 2
	Team: E
		Event type: issue		Events: 7
		Event type: push		Events: 6
		Event type: pull-request		Events: 1


In [10]:
# 7. Frequency of event types per repo
esq.event_frequency_repo(es)

7. Frequency of event types per repo: 
	Repo: proj1
		Event type: push		Events: 15
		Event type: issue-comment		Events: 10
		Event type: issue		Events: 9
		Event type: pull-request		Events: 4
	Repo: proj2
		Event type: issue-comment		Events: 7
		Event type: issue		Events: 6
		Event type: push		Events: 6
		Event type: pull-request		Events: 4
	Repo: proj3
		Event type: push		Events: 8
		Event type: issue		Events: 6
		Event type: issue-comment		Events: 2
		Event type: pull-request		Events: 2
	Repo: proj4
		Event type: issue		Events: 3
		Event type: issue-comment		Events: 1
		Event type: push		Events: 1


In [11]:
# 8. Average time difference between events per team
esq.event_time_difference_team(es)

8. Average Time Difference between events per team: 
	Team: C		Time Difference: 4288.4 milliseconds
	Team: B		Time Difference: 5062.0 milliseconds
	Team: A		Time Difference: 5945.0 milliseconds
	Team: D		Time Difference: 6680.46666667 milliseconds
	Team: E		Time Difference: 5583.57142857 milliseconds


In [12]:
# 9. Average time difference between same events per team
esq.same_event_time_difference_team(es)

9. Average Time Difference between same events per team: 
	Team: C
		Event: issue-comment		Time Difference: 13276.25 milliseconds
		Event: issue		Time Difference: 11870.5714286 milliseconds
		Event: push		Time Difference: 13017.6 milliseconds
		Event: pull-request		Time Difference: 30042.0 milliseconds
	Team: B
		Event: push		Time Difference: 13739.7142857 milliseconds
		Event: issue		Time Difference: 15272.25 milliseconds
		Event: issue-comment		Time Difference: 15771.5 milliseconds
		Event: pull-request		Time Difference: 37014.0 milliseconds
	Team: A
		Event: push		Time Difference: 12848.6666667 milliseconds
		Event: issue		Time Difference: 18361.6666667 milliseconds
		Event: pull-request		Time Difference: 9504.0 milliseconds
		Event: issue-comment		Time Difference: 74169.0 milliseconds
	Team: D
		Event: push		Time Difference: 20041.4 milliseconds
		Event: issue-comment		Time Difference: 5752.0 milliseconds
		Event: issue		Time Difference: 35085.0 milliseconds
		Event: pull-request		

ZeroDivisionError: float division by zero