In [None]:
url_tmpl = "https://issue.pbsbiotech.com/projects/%s/issues.csv?utf8=%%E2%%9C%%93&columns=all"
_p_urls = [
    "pbscustomer", "pbsdisposables", "pbsinstruments", 
    "magic-metals", "manufacturing", "pbssoftware", "swtesting",
    "system-qualification-testing"
]
project_urls = [url_tmpl % p for p in _p_urls]
project_urls

In [24]:
""" Issuetracker API 

* TODO: Create IssueList class (?)
* Parse Gantt HTML for class 'issue-subject' using style:width to determine hierarchy
* Consider method of lazy evaluation of issue field generation by
calling back to API to download project issues CSV, and update all issues
in project. 
* Implement issue caching

Issue():
    * Add programmatic logging of all fields seen, ever.
    * Map fields seen to types and conversion functions

"""

import requests
import urllib
import pyquery
from collections import OrderedDict
import re

uj = urllib.parse.urljoin

class IssuetrackerAPI():
    _login_url = "/login"
    _proj_issues_url = "/projects/%s/issues"
    _issues_url = "/issues"
    _proj_url = "/projects"
    
    def __init__(self, base_url):
        r = urllib.parse.urlparse(base_url)
        if not r.scheme and not r.netloc:
            base_url = urllib.parse.urlunparse(("https", r.path, "", r.params, r.query, r.fragment))
        self._base_url = base_url
        self._sess = requests.Session()
        self._headers = {}
        self._username = None
    
    def login(self, user='Admin', pw='hunter2'):
        r1 = self._sess.get(self._base_url)
        r1.raise_for_status()
        q = pyquery.PyQuery(r1.content)
        data = {}
        for td in q("#login-form :input"):
            at = td.attrib
            if 'name' in at:
                k = at['name']
                v = at.get('value', "")
                data[k] = v
        data['username'] = user
        data['password'] = pw
        body = urllib.parse.urlencode(data)
        r2 = self._sess.post(uj(self._base_url, self._login_url), body)
        r2.raise_for_status()
        if not pyquery.PyQuery(r2.content)("#loggedas"):
            raise ValueError("Invalid Username or Password")
        return r2
        
    def download_project_issues(self, project, utf8=True, columns='all'):
        r = self._download_project_csv(project, utf8, columns)
        return self._parse_proj_csv(r.content)
    
    def _download_project_csv(self, project, utf8, columns):
        if utf8:
            utf8 = "%E2%9C%93"
        else:
            utf8 = ""
        url_end = ".csv?utf8=%s&columns=%s" 
        url = (self._proj_issues_url + url_end) % (project, utf8, columns)
        url = uj(self._base_url, url) + ""
        r = self._sess.get(url)
        r.raise_for_status()
        return r
    
    def download_issue(self, id, type='pdf'):
        href = self._issues_url + "/" + str(id)
        return self.download_issue2(href, type)

    
    def download_issue2(self, href, type='pdf'):
        """ Sometimes it is more convenient to access issue by provided 
        href. """
        assert type in ('pdf', '.pdf'), "Non-pdf download not supported"
        if type[0] != ".":
            type = "." + type
        url = uj(self._base_url, name + type)
        r = self._sess.get(url)
        r.raise_for_status()
        return r.content
        
    def _parse_proj_csv(self, csv, encoding='utf-8'):
        if not isinstance(csv, str):
            csv = csv.decode(encoding)
        sl = csv.splitlines()
        sl[0] = sl[0].lower().replace('"', "")
        lines = [l.split(",") for l in sl]
        issues = OrderedDict()
        for i, l in enumerate(lines[1:], 1):
            issue = _Issue(line=sl[i], api=self)
            for key, val in zip(lines[0], l):
                issue[key] = val.strip('"') or "<n/a>"
            issue['#'] = int(issue['#'])
            issues[issue['#']] = issue
        return issues
    
    def download_projects(self):
        url = uj(self._base_url, self._proj_url)
        r = self._sess.get(url)
        r.raise_for_status()
        c = r.content
        q = pyquery.PyQuery(c)
        q2 = q("#projects-index > [class='projects root']")
        projects = _Project(self, "All", "")
        for e in q2.children(".root"):
            proj_ele = pyquery.PyQuery(e).children(".root > a")[0]
            pt = proj_ele.text
            phref = proj_ele.attrib['href'].split("/")[-1]
            proj = projects.add(pt, phref)
            q4 = pyquery.PyQuery(e).children("[class='more collapsed']")
            if len(q4) and _sp_re.match(q4[0].text):
                q3 = pyquery.PyQuery(e)("[class='projects ']")
                for e2 in q3(".child > .child > a"):
                    proj.add(e2.text, e2.attrib['href'].split("/")[-1])
        return projects
    
    def _download_gantt_raw(self, project):
        url = (self._proj_issues_url % project) + "/gantt"
        url = uj(self._base_url, url)
        r1 = self._sess.get(url)
        r1.raise_for_status()
        return r1.content

    def download_gantt(self, project):
        c = _download_gantt_raw(self, 'pbssoftware')
        q = pyquery.PyQuery(c)
        q2 = q(".gantt_subjects")
        issues = []
        for el in q2.children(".issue-subject"):
            title = el.attrib['title']
            e2=pyquery.PyQuery(el).children("span > a")[0]
            tracker, id = _name2id_re.match(e2.text).groups()
            href = e2.attrib['href']
            i = _Issue(api=self)
            i.href = href
            i.title = title
            i.id = int(id)
            i.tracker = tracker
            issues.append(i)
        return issues
    

_sp_re = re.compile(r"(\d*?) (subproject)?(s{0,1})")

class _Project():
    def __init__(self, api, pretty_name, url_name, subprojects=None):
        self._api = api
        self.name = pretty_name
        self._url_name = url_name
        self._subprojects = subprojects or OrderedDict()
    def add_subproject(self, proj):
        self._subprojects[proj.name] = proj
        return proj
    def add(self, pretty_name, url, subprojects=None):
        proj = self.__class__(self._api, pretty_name, url, subprojects)
        return self.add_subproject(proj)
    def __iter__(self):
        for proj in self._subprojects.values():
            yield proj
            yield from proj
    def __repr__(self):
        return "_Project(pretty_name='%s', url_name='%s')" \
                    %(self.name, self._url_name)
    def download_issues(self, utf8=True, columns='all'):
        return self._api.download_project_issues(self._url_name, utf8, columns)
    
    def download_gantt(self):
        return self._api.download_gantt(self._urlname)
    

class _Issue():
    
    _sub_re = re.compile(r"([\s#%/\.]+)")
    _attrib = frozenset((
            '_sub_re',
            '_fields',
            '_api',
            '_line',
            '_attrib'
        ))
    
    def _fix_field(self, m):
        if m.string == '#':
            return 'id'
        if '%' in m.group(1) and m.start() == 0:
            return "percent_"
        if m.end() == len(m.string):
            return ""
        return "" if m.start() == 0 else "_"
        
    def _field2attr(self, field):
        field = field.lower().replace('"', "")
        attr = self._sub_re.sub(self._fix_field, field)
        assert attr.isidentifier(), "'%s' is not a valid identifier"%attr
        return attr
    
    def _fixattrval(self, attr, val):
        """ Fix values for known attributes """
        dates = {
            'start_date',
            'due_date',
            'date_approved',
            'next_milestone'
        }
        dts = {
            'updated',
            'created'
        }
    
    def __init__(self, line=None, api=None, **fields):
        object.__setattr__(self, '_fields', OrderedDict())
        self._api = api
        self._line = line
        for f in fields:
            fattr = self._field2attr(f)
            fval = self._fixattrval(f, fields[f])
            setattr(self, fattr, fval)
        
    def __getattr__(self, key):
        try:
            return self._fields[key]
        except KeyError:
            raise AttributeError(key) from None
        
    def __setattr__(self, key, val):
        if key in self._attrib:
            object.__setattr__(self, key, val)
        else:
            self._fields[key] = val
            
    def add_field(self, key, val):
        self[key] = val

    def __getitem__(self, key):
        return self._fields[key]
    
    def __setitem__(self, key, value):
        self._fields[key] = value
    
    def items(self):
        return self._fields.items()
            
    def __repr__(self):
        msg = ", ".join("%s=%r" % i for i in self.items())
        cn = self.__class__.__name__
        return "%s(%s)" % (cn, msg)
    
    def pretty_print(self):
        msg = "\n".join("%.30s: %s" % i for i in self.items())
        if not msg: msg = '<empty>'
        return "Issue:\n" + msg
    
    def download(self, type='pdf'):
        return self._api.download_issue(self['#'], type)

In [26]:
i = IssuetrackerAPI("issue.pbsbiotech.com")
r2=i.login("nstarkweather@pbsbiotech.com", 'kookychemist')
issues = i.download_project_issues('pbssoftware')
iss = next(_ for _ in issues.values())
print(iss.pretty_print())

Issue:
#: 3266
project: Software
tracker: Bug
parent task: <n/a>
status: Resolved
priority: Normal
subject: Alarm editor does not show all alarms
author: James Small
assignee: James Small
updated: 08/24/2016 10:11 AM
category: <n/a>
sprint/milestone: 3.0
start date: 08/24/2016
due date: <n/a>
estimated time: <n/a>
spent time: 0.00
% done: 0
created: 08/24/2016 10:11 AM
closed: <n/a>
related issues: <n/a>
type of request: <n/a>
product type: <n/a>
serial no. lot no.: <n/a>
followup actions: <n/a>
return material authorization: <n/a>
root cause: <n/a>
date approved: <n/a>
private: No


In [18]:
import os
import pathlib
if 0:
    p = pathlib.Path(os.path.abspath(os.curdir))
    p /= "softwarepdf"
    os.makedirs(str(p), exist_ok=True)
    def write(fn, raw):
        with open(str(p/fn), 'wb') as f:
            f.write(raw)
    issues = i.download_project_issues('pbssoftware')
    for i in issues:
        raw = i.download()
        fn = "%d.pdf" % i.id
        write(fn, raw)
    

TypeError: download_project_issues() missing 1 required positional argument: 'project'

In [98]:
#raw=i._download_project_csv('pbssoftware', True, 'all')
r = raw.content.decode().splitlines()[0].split(",")
r[-1] = r[-1] + ".."
r

['"#"',
 '"Project"',
 '"Tracker"',
 '"Parent task"',
 '"Status"',
 '"Priority"',
 '"Subject"',
 '"Author"',
 '"Assignee"',
 '"Updated"',
 '"Category"',
 '"Sprint/Milestone"',
 '"Start date"',
 '"Due date"',
 '"Estimated time"',
 '"Spent time"',
 '"% Done"',
 '"Created"',
 '"Closed"',
 '"Related issues"',
 '"Type of Request"',
 '"Product Type"',
 '"Serial No. Lot No."',
 '"Followup Actions"',
 '"Return Material Authorization"',
 '"Root Cause"',
 '"Date Approved"',
 '"Private"..']

In [145]:
issue = _Issue()
for f in r:
    a = issue._field2attr(f)
    print("self.%s = ''" % (a))

self.id = ''
self.project = ''
self.tracker = ''
self.parent_task = ''
self.status = ''
self.priority = ''
self.subject = ''
self.author = ''
self.assignee = ''
self.updated = ''
self.category = ''
self.sprint_milestone = ''
self.start_date = ''
self.due_date = ''
self.estimated_time = ''
self.spent_time = ''
self.percent_done = ''
self.created = ''
self.closed = ''
self.related_issues = ''
self.type_of_request = ''
self.product_type = ''
self.serial_no_lot_no = ''
self.followup_actions = ''
self.return_material_authorization = ''
self.root_cause = ''
self.date_approved = ''
self.private = ''


In [146]:
issue

_Issue()

In [20]:
od=i.download_projects()
for proj in od:
    print(proj)
issues=proj.download_issues()

_Project(pretty_name='Customer Support (CRS)', url_name='pbscustomer')
_Project(pretty_name='Disposables', url_name='pbsdisposables')
_Project(pretty_name='Engineering', url_name='pbsinstruments')
_Project(pretty_name='1L MINI', url_name='l-mini')
_Project(pretty_name='Magic Metals', url_name='magic-metals')
_Project(pretty_name='Manufacturing', url_name='manufacturing')
_Project(pretty_name='Program Management', url_name='program-management')
_Project(pretty_name='Quality and Support Systems', url_name='qss')
_Project(pretty_name='Software', url_name='pbssoftware')
_Project(pretty_name='SW Testing/Verification', url_name='swtesting')
_Project(pretty_name='System Qualification Testing', url_name='system-qualification-testing')


In [155]:
_name2id_re = re.compile(r"(.*?)\s*?#(\d*)$")

def _download_gantt_raw(self, project):
    url = (self._proj_issues_url % project) + "/gantt"
    url = uj(self._base_url, url)
    r1 = self._sess.get(url)
    r1.raise_for_status()
    return r1.content

def gantt_issue_list(self, project):
    c = _download_gantt_raw(self, 'pbssoftware')
    q = pyquery.PyQuery(c)
    q2 = q(".gantt_subjects")
    issues = []
    for el in q2.children(".issue-subject"):
        title = el.attrib['title']
        e2=pyquery.PyQuery(el).children("span > a")[0]
        tracker, id = _name2id_re.match(e2.text).groups()
        href = e2.attrib['href']
        i = _Issue(api=self)
        i.href = href
        i.title = title
        i.id = int(id)
        i.tracker = tracker
        issues.append(i)
    return issues
issues=gantt_issue_list(i, 'pbssoftware')
print(*issues, sep='\n')

_Issue(href='/issues/1921', title='Update vbs scripts for 1.22', id='1921', tracker='Bug')
_Issue(href='/issues/1959', title='1.22 needs version #', id='1959', tracker='Back_End_Improvements')
_Issue(href='/issues/2712', title='CFR 21 Part 11 Issues parent', id='2712', tracker='Planning')
_Issue(href='/issues/396', title='Back End Documentation', id='396', tracker='Support')
_Issue(href='/issues/420', title='Database sometimes gets corrupt', id='420', tracker='Specification')
_Issue(href='/issues/989', title='Login Behavior', id='989', tracker='Specification')
_Issue(href='/issues/2725', title='User Events Issues Parent', id='2725', tracker='Planning')
_Issue(href='/issues/1073', title='Changes to Existing User Events', id='1073', tracker='Specification')
_Issue(href='/issues/2115', title='Web UI Login events show "Unknown User"', id='2115', tracker='Specification')
_Issue(href='/issues/2445', title='Web UI Logout unknown user', id='2445', tracker='Specification')
_Issue(href='/issues/

In [21]:
print(csv[2725])

Issue:
#: 2725
project: Software
tracker: Planning
parent task: 2712
status: New
priority: Immediate
subject: User Events Issues Parent
author: James Small
assignee: <n/a>
updated: 07/07/2016 12:30 PM
category: <n/a>
sprint/milestone: 3.0
start date: 01/27/2012
due date: <n/a>
estimated time: 25.50
spent time: 0.00
% done: 36
created: 09/25/2015 12:22 PM
closed: <n/a>
related issues: <n/a>
type of request: <n/a>
product type: <n/a>
serial no. lot no.: <n/a>
followup actions: <n/a>
return material authorization: <n/a>
root cause: <n/a>
date approved: <n/a>
private: No


In [22]:
csv[2725]._line

'"2725","Software","Planning","2712","New","Immediate","User Events Issues Parent","James Small","","07/07/2016 12:30 PM","","3.0","01/27/2012","","25.50","0.00","36","09/25/2015 12:22 PM","","","","","","","","","","No"'

In [None]:
m1[0]
fields = m1[0].copy()
fields = [f.lower().replace(" ", "_").replace("%", "_").replace("/", "_") for f in fields]
s = "        def __init__(self, %s):"
s %= ", ".join(fields)
print(s)
s2 = " "*8+(" "*8).join("self._%s = %s\n" % (f, f) for f in fields)
print(s2)