In [10]:
import logging
import ConfigParser
import csv
import functools
import getpass
import koji
import datetime
import time
from requests.exceptions import ConnectionError
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LinearRegression

class Brew():

    def __init__(self):
        self.client = koji.ClientSession('http://brewhub.engineering.redhat.com/brewhub')
    
    def retry(timeout=240, interval=60, wait_on=Exception):
        """ A decorator that allows to retry a section of code...
        ...until success or timeout.
        """
        def wrapper(function):
            @functools.wraps(function)
            def inner(*args, **kwargs):
                start = time.time()
                while True:
                    if (time.time() - start) >= timeout:
                        raise  # This re-raises the last exception.
                    try:
                        return function(*args, **kwargs)
                    except wait_on as e:
                        print("Exception %r raised from %r.  Retry in %rs" % (
                            e, function, interval))
                        time.sleep(interval)
            return inner
        return wrapper

    def iterator(self, start_date, end_date):
        current_date = start_date
        delta = datetime.timedelta(days=1)
        while current_date < end_date:
            yield current_date
            current_date += delta
            
    @retry(wait_on=ConnectionError)
    def find_data(self):
        # fetch builds from Teiid
        docs = []
        start_date = datetime.datetime.strptime(
                '2017-07-25', '%Y-%m-%d').date()
        end_date = datetime.date.today()
        previous_date = start_date
        count = 0
        for date in self.iterator(start_date, end_date):
            builds = self.client.listBuilds(
                    completeAfter=str(previous_date), completeBefore=str(date))
            docs.extend(builds)
            previous_date = date
            count += 1
            if count%5 == 0:
                time.sleep(1)

        return docs
    
    def copy_data_in_csv(self, docs):
        # This is just faster in terms of retrieval. The calculation of diff and removal of None 
        # would be easier in dataframes.
        print len(docs)
        the_file = open("metric_data.csv", "w")
        writer = csv.DictWriter(the_file, docs[0].keys())
        writer.writeheader()
        writer.writerows(docs)
        the_file.close()        
    
br = Brew()
docs = br.find_data()
br.copy_data_in_csv(docs)


1604


In [17]:
# Read data from csv to a dataframe
import pandas as pd
data_df = pd.read_csv("metric_data.csv")
data_df

Unnamed: 0,package_name,extra,creation_time,completion_time,package_id,build_id,state,source,epoch,version,...,nvr,start_time,creation_event_id,start_ts,volume_id,creation_ts,name,task_id,volume_name,release
0,release-e2e-test,,2017-07-25 00:01:53.477076,2017-07-25 00:02:39.116925,56040,576348,1,,,1.0.774,...,release-e2e-test-1.0.774-1.el7,2017-07-25 00:01:53.477076,16126459,1.500941e+09,0,1.500941e+09,release-e2e-test,13714232.0,DEFAULT,1.el7
1,e2e-container-test-product-docker,"{'image': {'autorebuild': False, 'help': None}...",2017-07-25 00:10:06.009194,2017-07-25 00:09:43,61060,576349,1,git://pkgs.devel.redhat.com/rpms/e2e-container...,,7.3,...,e2e-container-test-product-docker-7.3-921,2017-07-25 00:01:25,16126485,1.500941e+09,0,1.500941e+09,e2e-container-test-product-docker,,DEFAULT,921
2,kernel-rt,,2017-07-24 22:26:43.363954,2017-07-25 00:29:54.109895,3727,576344,3,,1.0,3.10.0,...,kernel-rt-3.10.0-693.rt56.581.el6rt,2017-07-24 22:26:43.363954,16126334,1.500935e+09,0,1.500935e+09,kernel-rt,13713857.0,DEFAULT,693.rt56.581.el6rt
3,rhsm-tools,,2017-07-25 00:38:37.117303,2017-07-25 00:40:10.096435,59288,576350,1,,,1.19,...,rhsm-tools-1.19-1.el6eng,2017-07-25 00:38:37.117303,16126504,1.500943e+09,0,1.500943e+09,rhsm-tools,13714273.0,DEFAULT,1.el6eng
4,rhsm-tools,,2017-07-25 00:41:04.71004,2017-07-25 00:44:09.872401,59288,576351,1,,,1.19,...,rhsm-tools-1.19-1.el7eng,2017-07-25 00:41:04.71004,16126509,1.500943e+09,0,1.500943e+09,rhsm-tools,13714275.0,DEFAULT,1.el7eng
5,rust-toolset-7-cargo,,2017-07-25 00:52:39.165044,2017-07-25 01:30:21.8466,62601,576352,1,,,0.20.0,...,rust-toolset-7-cargo-0.20.0-1.el7,2017-07-25 00:52:39.165044,16126520,1.500944e+09,0,1.500944e+09,rust-toolset-7-cargo,13714287.0,DEFAULT,1.el7
6,rhel-server-fs-docker,,2017-07-25 01:42:40.55523,2017-07-25 01:46:31.627852,59414,576355,1,,0.0,6.10,...,rhel-server-fs-docker-6.10-58,2017-07-25 01:42:40.55523,16126634,1.500947e+09,0,1.500947e+09,rhel-server-fs-docker,13714538.0,DEFAULT,58
7,rhel-guest-image,,2017-07-25 01:41:51.564948,2017-07-25 01:48:04.046371,42902,576354,1,,0.0,6.10,...,rhel-guest-image-6.10-118,2017-07-25 01:41:51.564948,16126631,1.500947e+09,0,1.500947e+09,rhel-guest-image,13714533.0,DEFAULT,118
8,rhel-server-docker,"{'image': {'autorebuild': False, 'help': None}...",2017-07-25 01:54:27.595184,2017-07-25 01:54:04,46311,576356,1,git://pkgs.devel.redhat.com/rpms/rhel-server-d...,,6.10,...,rhel-server-docker-6.10-58,2017-07-25 01:42:26,16126670,1.500947e+09,0,1.500948e+09,rhel-server-docker,,DEFAULT,58
9,rhel-guest-image,,2017-07-25 02:36:20.73228,2017-07-25 02:42:30.888643,42902,576358,1,,0.0,6.10,...,rhel-guest-image-6.10-119,2017-07-25 02:36:20.73228,16126720,1.500950e+09,0,1.500950e+09,rhel-guest-image,13714708.0,DEFAULT,119


In [18]:
data_df['waiting_time(seconds)'] = (data_df.creation_ts - data_df.start_ts)
diff_df = data_df[['creation_ts','start_ts','waiting_time(seconds)']]
diff_df

Unnamed: 0,creation_ts,start_ts,waiting_time(seconds)
0,1.500941e+09,1.500941e+09,0.00000
1,1.500941e+09,1.500941e+09,521.00919
2,1.500935e+09,1.500935e+09,0.00000
3,1.500943e+09,1.500943e+09,0.00000
4,1.500943e+09,1.500943e+09,0.00000
5,1.500944e+09,1.500944e+09,0.00000
6,1.500947e+09,1.500947e+09,0.00000
7,1.500947e+09,1.500947e+09,0.00000
8,1.500948e+09,1.500947e+09,721.59518
9,1.500950e+09,1.500950e+09,0.00000


In [12]:
#     def copy_data_in_csv(self, docs): 
#         # copy builds in csv file
#         writer = csv.writer(csvfile, delimiter=',', fieldnames=[])
#         for build in docs:
#             print build
#             break
#         with open('test.csv', 'a') as csvfile:
#             if not build["creation_time"] or not build["start_time"]: 
#                 diff = None 
#             else: 
#                 if build["creation_time"].find(".") != -1:
#                     created_time = datetime.datetime.strptime(build["creation_time"], '%Y-%m-%d %H:%M:%S.%f')
#                 else:
#                     created_time = datetime.datetime.strptime(build["creation_time"], '%Y-%m-%d %H:%M:%S')

#                 if build["start_time"].find(".") != -1:
#                     start_time = datetime.datetime.strptime(build["start_time"], '%Y-%m-%d %H:%M:%S.%f')
#                 else:
#                     start_time = datetime.datetime.strptime(build["start_time"], '%Y-%m-%d %H:%M:%S')
#                 diff = str(created_time - start_time)

#         writer.writerow([build["package_name"], build["extra"], build["creation_time"], build["completion_time"], build["package_id"], build["build_id"], build["state"], build["source"], build["epoch"], build["version"], build["completion_ts"], build["owner_id"], build["owner_name"], build["nvr"], build["start_time"], build["creation_event_id"], build["start_ts"], build["volume_id"], build["creation_ts"], build["name"], build["task_id"], build["release"], diff])
