In [None]:
url_tmpl = "https://issue.pbsbiotech.com/projects/%s/issues.csv?utf8=%%E2%%9C%%93&columns=all"
_p_urls = [
    "pbscustomer", "pbsdisposables", "pbsinstruments", 
    "magic-metals", "manufacturing", "pbssoftware", "swtesting",
    "system-qualification-testing"
]
project_urls = [url_tmpl % p for p in _p_urls]
project_urls

In [None]:
""" Issuetracker API 

* TODO: Create IssueList class (?)
* Parse Gantt HTML for class 'issue-subject' using style:width to determine hierarchy
* Consider method of lazy evaluation of issue field generation by
calling back to API to download project issues CSV, and update all issues
in project. 
* Implement issue caching

Issue():
    * Add programmatic logging of all fields seen, ever.
    * Map fields seen to types and conversion functions

"""

import requests
import urllib
import pyquery
from collections import OrderedDict
import re
import dateutil.parser
import lxml

uj = urllib.parse.urljoin
_sp_re = re.compile(r"(\d*?) (subproject)?(s{0,1})")
_name2id_re = re.compile(r"(.*?)\s*?#(\d*)$")

class IssuetrackerAPI():
    _login_url = "/login"
    _proj_issues_url = "/projects/%s/issues"
    _issues_url = "/issues"
    _proj_url = "/projects"
    
    def __init__(self, base_url, username=None, pw=None):
        r = urllib.parse.urlparse(base_url)
        if not r.scheme and not r.netloc:
            base_url = urllib.parse.urlunparse(("https", r.path, "", r.params, r.query, r.fragment))
        self._base_url = base_url
        self._sess = requests.Session()
        self._headers = {}
        
        if username is None or pw is None:
            raise ValueError("Must have valid username and password.")
        
        self._username = username
        self._password = pw
        self._auth = (username, pw)
        
        self._cache = {}
        
    @property
    def issues(self):
        raise NotImplemented
        
    @property
    def projects(self):
        pj = self._cache.get("projects")
        if not pj:
            pj = self.down_projects()
            self._cache['projects'] = pj
        return pj
            
    def login(self):

        r1 = self._sess.get(self._base_url)
        r1.raise_for_status()
        q = pyquery.PyQuery(r1.content)
        data = {}
        for td in q("#login-form :input"):
            at = td.attrib
            if 'name' in at:
                k = at['name']
                v = at.get('value', "")
                data[k] = v
                
        data['username'] = self._username
        data['password'] = self._password
        
        body = urllib.parse.urlencode(data)
        r2 = self._sess.post(uj(self._base_url, self._login_url), body)
        r2.raise_for_status()
        if not pyquery.PyQuery(r2.content)("#loggedas"):
            raise ValueError("Invalid Username or Password")
        return r2
        
    def download_project_issues(self, project, utf8=True, columns='all'):
        r = self._download_project_csv(project, utf8, columns)
        return self._parse_proj_csv(r.content)
    
    def _download_project_csv(self, project, utf8, columns):
        if utf8:
            utf8 = "%E2%9C%93"
        else:
            utf8 = ""
        url_end = ".csv?utf8=%s&columns=%s" 
        url = (self._proj_issues_url + url_end) % (project, utf8, columns)
        url = uj(self._base_url, url)
        r = self._sess.get(url)
        r.raise_for_status()
        return r
    
    def download_issue(self, id, type='pdf'):
        href = self._issues_url + "/" + str(id)
        return self.download_issue2(href, type)

    
    def download_issue2(self, href, type='pdf'):
        """ Sometimes it is more convenient to access issue by provided 
        href. """
        assert type in ('pdf', '.pdf'), "Non-pdf download not supported"
        if type[0] != ".":
            type = "." + type
        url = uj(self._base_url, name + type)
        r = self._sess.get(url)
        r.raise_for_status()
        return r.content
        
    def _parse_proj_csv(self, csv, encoding='utf-8'):
        if not isinstance(csv, str):
            csv = csv.decode(encoding)
        sl = csv.splitlines()
        sl[0] = sl[0].lower().replace('"', "")
        lines = [l.split(",") for l in sl]
        issues = OrderedDict()
        for i, l in enumerate(lines[1:], 1):
            issue = _Issue(line=sl[i], api=self)
            for key, val in zip(lines[0], l):
                issue[key] = val.strip('"') or "<n/a>"
            issue['#'] = int(issue['#'])
            issues[issue['#']] = issue
        return issues
    
#     def download_projects(self):
#         url = uj(self._base_url, self._proj_url)
#         r = self._sess.get(url)
#         r.raise_for_status()
#         c = r.content
#         q = pyquery.PyQuery(c)
#         q2 = q("#projects-index > [class='projects root']")
#         projects = _Project(self, "All", "")
#         for e in q2.children(".root"):
#             proj_ele = pyquery.PyQuery(e).children(".root > a")[0]
#             pt = proj_ele.text
#             phref = proj_ele.attrib['href'].split("/")[-1]
#             proj = projects.add(pt, phref)
#             q4 = pyquery.PyQuery(e).children("[class='more collapsed']")
#             if len(q4) and _sp_re.match(q4[0].text):
#                 q3 = pyquery.PyQuery(e)("[class='projects ']")
#                 for e2 in q3(".child > .child > a"):
#                     proj.add(e2.text, e2.attrib['href'].split("/")[-1])
#         return projects

    def download_projects(self):
        url = uj(self._base_url, self._proj_url + ".xml")
        r = self._sess.get(url, auth=self._auth)
        r.raise_for_status()
        xml = lxml.etree.XML(r.content)
        projects = {}
        for proj in xml.findall("project"):
            p = Project.from_element(self, proj)
            projects[p.name] = p
        
        # Second pass, process project subtasks
        for p in projects.values():
            if p.parent is not None:
                parent = projects[p.parent['name']]
                parent.add_subproject(p)
        return projects
    
    def _download_gantt_raw(self, project):
        url = (self._proj_issues_url % project) + "/gantt"
        url = uj(self._base_url, url)
        r1 = self._sess.get(url)
        r1.raise_for_status()
        return r1.content

    def download_gantt(self, project):
        c = _download_gantt_raw(self, project)
        q = pyquery.PyQuery(c)
        q2 = q(".gantt_subjects")
        issues = []
        for el in q2.children(".issue-subject"):
            title = el.attrib['title']
            e2=pyquery.PyQuery(el).children("span > a")[0]
            tracker, id = _name2id_re.match(e2.text).groups()
            href = e2.attrib['href']
            i = _Issue(api=self)
            i.href = href
            i.title = title
            i.id = int(id)
            i.tracker = tracker
            issues.append(i)
        return issues
    

def _parse_custom_fields(e):
    rv = {}
    for cf in e.findall("custom_field"):
        cfd = {}
        cfd.update(cf.attrib)
        v = cf.find("value")
        if v is None or v.text == 'blank':
            val = None
        else:
            val = v.text
        cfd['value'] = val
        rv[cfd['name']] = cfd
    return rv

def _parse_datetime(e):
    return dateutil.parser.parse(e.text)

def _parse_int(e):
    return int(e.text)

def _parse_bool(e):
    t = e.text.lower()
    if t == 'false':
        return False
    return True

def _parse_parent(e):
    return {k:v for k,v in e.attrib.items()}


class Project():
    def __init__(self, api, id=0, name="", identifier="", description="", parent=None, status=None, 
                 is_public=False, custom_fields=None, created_on=None, updated_on=None):
        self._api = api
        self.name = name
        self.id = id
        self.name = name
        self.identifier = identifier
        self.description = description
        self.parent = parent
        self.status = status
        self.is_public = is_public
        self.custom_fields = custom_fields
        self.created_on = created_on
        self.updated_on = updated_on
        self._subprojects = []
        
    def add_subproject(self, sp):
        sp.parent = self
        if sp not in self._subprojects:
            self._subprojects.append(sp)
            
    def __repr__(self):
        return "_Project(%s)" % ', '.join("%s=%r" % (k[0], getattr(self, k[0])) for k in self._proj_parse_table)
    
    def download_issues(self, utf8=True, columns='all'):
        return self._api.download_project_issues(self.identifier, utf8, columns)
    
    def download_gantt(self):
        return self._api.download_gantt(self.identifier)
        
    _proj_parse_table = [
    # e.tag attr parse function
    ("id", "id", _parse_int),
    ("name", "name", None),
    ("identifier", "identifier", None),
    ("description", "description", None),
    ("parent", "parent", _parse_parent),
    ("status", "status", _parse_int),
    ("is_public", "is_public", _parse_bool),
    ("custom_fields", "custom_fields", _parse_custom_fields),
    ("created_on", "created_on", _parse_datetime),
    ("updated_on", "updated_on", _parse_datetime),
]
        
    @classmethod
    def from_element(cls, api, e):
        kw = {}
        for tag, k, func in cls._proj_parse_table:
            el = e.find(tag)
            if el is None:
                continue
            if func:
                v = func(el)
            else:
                v = el.text
            if v is not None:
                kw[k] = v
        if not kw and e.tag != 'project':
            raise ValueError("Failed to parse element: element should be <project> element.")
        return cls(api, **kw)
        

In [None]:
i = IssuetrackerAPI("issue.pbsbiotech.com", 'nstarkweather', 'kookychemist')
r2=i.login()
try:
    jd
except NameError:
    jd = {'issue':{}}
#issues = i.download_project_issues('pbssoftware')
#iss = next(_ for _ in issues.values())

In [None]:
import json
jd2 = json.loads(i._sess.get("https://issue.pbsbiotech.com/issues/2206.json", auth=i._auth).content.decode())
jd['issue'].update(jd2['issue'])
for f in jd2['issue']['custom_fields']:
    jd['issue'][f['name']] = f['value']
#print(jd2['issue']['custom_fields'])
#print(json.dumps(jd, indent=4))


In [None]:
#raw=i._download_project_csv('pbssoftware', True, 'all')
r = raw.content.decode().splitlines()[0].split(",")
r = [_.replace('"', "") for _ in r]
r[0] = 'id'
r.sort(key=str.lower)
r[0] = 'done_ratio'
r.sort(key=str.lower)

js = sorted(jd['issue'].keys(), key=lambda s: s.lower())
from itertools import zip_longest
print("   R               JS")
for a, b in zip_longest(r, js, fillvalue=""):
    print("%-30s %-30s"%(a,b))

In [273]:
def download_project_issues(self, project):
    offset = 0
    limit = 100
    total_count = 0
    rv1 = {}
    while True:
        ops = {
            'limit': limit,
            'project': project,
            'offset': offset,
        }

        url = uj(self._base_url, self._issues_url + ".json")
        url += "?" + urllib.parse.urlencode(ops)
        print("Downloading issues: %d/%d" % (offset, total_count))
        r = self._sess.get(url, auth=self._auth)
        r.raise_for_status()
        d = json.loads(r.content.decode())
        issues = d['issues']
        if not issues:
            break
        offset += len(issues)
        for i in issues:
            rv1[i['id']] = i
        total_count = int(d.get('total_count',0))
        
        if offset > total_count:
            break
        break
            
    iss = _parse_issues(self, rv1.values())
    print(len(iss), total_count)
    return iss

def _parse_issues(self, issues):
    rv = {}
    for i in issues:
        print(i)
        rv[int(i['id'])] = _Issue(self, **i)
        break
    return rv

In [249]:
url = "https://issue.pbsbiotech.com/issues.json?limit=1?project=pbssoftware"
#c = i._sess.get(url, auth=i._auth).content
d=json.loads(c.decode())#['issues'][0]
s=json.dumps(d, indent=4)
print(s)

{
    "offset": 0,
    "limit": 1,
    "issues": [
        {
            "author": {
                "name": "James Small",
                "id": 42
            },
            "updated_on": "2016-08-30T17:02:40Z",
            "id": 3270,
            "custom_fields": [
                {
                    "name": "Type of Request",
                    "id": 8,
                    "value": ""
                },
                {
                    "name": "Product Type",
                    "id": 4,
                    "value": ""
                },
                {
                    "name": "Serial No. Lot No.",
                    "id": 9,
                    "value": ""
                },
                {
                    "name": "Followup Actions",
                    "id": 6,
                    "value": ""
                },
                {
                    "name": "Return Material Authorization",
                    "id": 7,
                    "value": ""
          

In [274]:
d=download_project_issues(i, 'pbssoftware')
list(d.values())[:5]

Downloading issues: 0/0
{'author': {'name': 'James Small', 'id': 42}, 'updated_on': '2016-07-20T22:10:05Z', 'id': 3159, 'custom_fields': [{'name': 'Type of Request', 'id': 8, 'value': ''}, {'name': 'Product Type', 'id': 4, 'value': ''}, {'name': 'Serial No. Lot No.', 'id': 9, 'value': ''}, {'name': 'Followup Actions', 'id': 6, 'value': ''}, {'name': 'Return Material Authorization', 'id': 7, 'value': ''}, {'name': 'Root Cause', 'id': 5, 'value': ''}, {'name': 'Date Approved', 'id': 16, 'value': ''}], 'fixed_version': {'name': '3.0', 'id': 52}, 'status': {'name': 'Feedback', 'id': 4}, 'created_on': '2016-06-02T18:25:23Z', 'description': 'If the first batch is "--", the resulting array output will have end times before the start times.  The solution is to modify the SQL call for getting batches with name=\'--\' so it also does not get batches if their ID is 1.\r\n\r\nIf the first batch has a name, and the last batch has a name, the last batch\'s end time will be "--" which is appropriate 

[_Issue(author=None, updated_on=None, id=None, custom_fields=None, fixed_version=None, status=None, created_on=None, description=None, tracker=None, subject=None, start_date=None, project=None, done_ratio=None, priority=None, assigned_to=None)]

TypeError: <module '__main__'> is a built-in class