In [None]:
url_tmpl = "https://issue.pbsbiotech.com/projects/%s/issues.csv?utf8=%%E2%%9C%%93&columns=all"
_p_urls = [
    "pbscustomer", "pbsdisposables", "pbsinstruments", 
    "magic-metals", "manufacturing", "pbssoftware", "swtesting",
    "system-qualification-testing"
]
project_urls = [url_tmpl % p for p in _p_urls]
project_urls

In [7]:
import requests
import urllib
import pyquery
from collections import OrderedDict
import re

uj = urllib.parse.urljoin

class IssuetrackerAPI():
    _login_url = "/login"
    _proj_issues_url = "/projects/%s/issues"
    _issues_url = "/issues"
    _proj_url = "/projects"
    
    def __init__(self, base_url):
        r = urllib.parse.urlparse(base_url)
        if not r.scheme and not r.netloc:
            base_url = urllib.parse.urlunparse(("https", r.path, "", r.params, r.query, r.fragment))
        self._base_url = base_url
        self._sess = requests.Session()
        self._headers = {}
        self._username = None
    
    def login(self, user='Admin', pw='hunter2'):
        r1 = self._sess.get(self._base_url)
        r1.raise_for_status()
        q = pyquery.PyQuery(r1.content)
        data = {}
        for td in q("#login-form :input"):
            at = td.attrib
            if 'name' in at:
                k = at['name']
                v = at.get('value', "")
                data[k] = v
        data['username'] = user
        data['password'] = pw
        body = urllib.parse.urlencode(data)
        r2 = self._sess.post(uj(self._base_url, self._login_url), body)
        r2.raise_for_status()
        if not pyquery.PyQuery(r2.content)("#loggedas"):
            raise ValueError("Invalid Username or Password")
        return r2
        
    def download_project_issues(self, project, utf8=True, columns='all'):
        if utf8:
            utf8 = "%E2%9C%93"
        else:
            utf8 = ""
        url_end = ".csv?utf8=%s&columns=%s" 
        url = (self._proj_issues_url + url_end ) % (project, utf8, columns)
        url = uj(self._base_url, url) + ""
        r = self._sess.get(url)
        r.raise_for_status()
        return self._parse_issues(r.content)
    
    def download_issue(self, id, type='pdf'):
        assert type in ('pdf', '.pdf'), "Non-pdf download not supported"
        name = "%d%s" % (id, ".pdf")
        url = uj(self._base_url, self._issues_url + "/" + name)
        r = self._sess.get(url)
        r.raise_for_status()
        return r.content
        
    def _parse_issues(self, csv, encoding='utf-8'):
        if not isinstance(csv, str):
            csv = csv.decode(encoding)
        sl = csv.splitlines()
        sl[0] = sl[0].lower().replace('"', "")
        lines = [l.split(",") for l in sl]
        issues = OrderedDict()
        for i, l in enumerate(lines[1:], 1):
            issue = _Issue(self, line=sl[i])
            for key, val in zip(lines[0], l):
                issue[key] = val.strip('"') or "<n/a>"
            issue['#'] = int(issue['#'])
            issues[issue['#']] = issue
        return issues
    
    def download_projects(self):
        url = uj(self._base_url, self._proj_url)
        r = self._sess.get(url)
        r.raise_for_status()
        c = r.content
        q = pyquery.PyQuery(c)
        q2 = q("#projects-index > [class='projects root']")
        projects = _Project(self, "All", "")
        for e in q2.children(".root"):
            proj_ele = pyquery.PyQuery(e).children(".root > a")[0]
            pt = proj_ele.text
            phref = proj_ele.attrib['href'].split("/")[-1]
            proj = projects.add(pt, phref)
            q4 = pyquery.PyQuery(e).children("[class='more collapsed']")
            if len(q4) and _sp_re.match(q4[0].text):
                q3 = pyquery.PyQuery(e)("[class='projects ']")
                for e2 in q3(".child > .child > a"):
                    proj.add(e2.text, e2.attrib['href'].split("/")[-1])
        return projects
    

_sp_re = re.compile(r"(\d*?) (subproject)?(s{0,1})")

class _Project():
    def __init__(self, api, pretty_name, url_name, subprojects=None):
        self._api = api
        self.name = pretty_name
        self._url_name = url_name
        self._subprojects = subprojects or OrderedDict()
    def add_subproject(self, proj):
        self._subprojects[proj.name] = proj
        return proj
    def add(self, pretty_name, url, subprojects=None):
        proj = self.__class__(self._api, pretty_name, url, subprojects)
        return self.add_subproject(proj)
    def __iter__(self):
        for proj in self._subprojects.values():
            yield proj
            yield from proj
    def __repr__(self):
        return "_Project(pretty_name='%s', url_name='%s')" \
                    %(self.name, self._url_name)
    def download_issues(self, utf8=True, columns='all'):
        return self._api.download_project_issues(self._url_name, utf8, columns)
    
    def download_gantt(self):
        return self._api.download_gantt(self._urlname)

        
class _Issue():
    
    def __init__(self, api, fields=None, line=None):
        self._fields = fields or OrderedDict()
        self._api = api
        self._line = line
    
    def __getitem__(self, key):
        return self._fields[key]
    
    def __setitem__(self, key, value):
        self._fields[key] = value
    
    def items(self):
        return self._fields.items()
            
    def __repr__(self):
        return "Issue:\n" + "\n".join("%s: %s" % (k,v) for k, v in self.items())
    
    def download(self, type='pdf'):
        return self._api.download_issue(self['#'], type)

In [8]:
i = IssuetrackerAPI("issue.pbsbiotech.com")
r2=i.login("nstarkweather@pbsbiotech.com", 'kookychemist')

In [4]:
q1 = pyquery.PyQuery(i._download_gantt('pbssoftware'))

In [None]:
q2 = q1("#content > table > tr > td > div > .gantt_subjects")
q3 = q2(".version-name")

In [None]:
for e in q3:
    el = pq(e)("span > a")
    if el.text() == '3.0':
        break

In [9]:
od=i.download_projects()
for proj in od:
    print(proj)
proj.download_issues()

_Project(pretty_name='Customer Support (CRS)', url_name='pbscustomer')
_Project(pretty_name='Disposables', url_name='pbsdisposables')
_Project(pretty_name='Engineering', url_name='pbsinstruments')
_Project(pretty_name='1L MINI', url_name='l-mini')
_Project(pretty_name='Magic Metals', url_name='magic-metals')
_Project(pretty_name='Manufacturing', url_name='manufacturing')
_Project(pretty_name='Program Management', url_name='program-management')
_Project(pretty_name='Quality and Support Systems', url_name='qss')
_Project(pretty_name='Software', url_name='pbssoftware')
_Project(pretty_name='SW Testing/Verification', url_name='swtesting')
_Project(pretty_name='System Qualification Testing', url_name='system-qualification-testing')


OrderedDict([(2882, Issue:
              #: 2882
              project: System Qualification Testing
              tracker: Testing
              parent task: <n/a>
              status: In Progress
              priority: Normal
              subject: CO2 MFC Testing
              author: Daniel Giroux
              assignee: Yas Hashimura
              updated: 03/14/2016 12:20 PM
              category: <n/a>
              sprint/milestone: <n/a>
              start date: 11/02/2015
              due date: <n/a>
              estimated time: <n/a>
              spent time: 0.00
              % done: 0
              created: 11/02/2015 11:59 AM
              closed: <n/a>
              related issues: Related to #2171
              type of request: <n/a>
              product type: Bioreactor
              serial no. lot no.: <n/a>
              followup actions: <n/a>
              return material authorization: <n/a>
              root cause: <n/a>
              private: No), (1768

In [41]:
_name2id_re = re.compile(r"(.*?)\s*?#(\d*)$")

def _download_gantt_raw(self, project):
    try:
        with open("gantt.html", 'rb') as f:
            c = f.read()
        return c
    except FileNotFoundError:
        pass
    url = (self._proj_issues_url % project) + "/gantt"
    url = uj(self._base_url, url)
    r1 = self._sess.get(url)
    r1.raise_for_status()
    with open("gantt.html", 'wb') as f:
        f.write(r1.content)
        content = r1.content
    return content

def download_gantt(self, project):
    c = _download_gantt_raw(i, 'pbssoftware')
    q = pyquery.PyQuery(c)
    q2 = q(".gantt_subjects")
    for el in q2.children(".issue-subject"):
        e2=pyquery.PyQuery(el).children("span > a")
        #print(e2.text(), "e2", el.attrib['id'])
        name, id = _name2id_re.match(e2.text()).groups()
        print(name, id)
download_gantt(i, 'pbssoftware')

Bug 1921
Back_End_Improvements 1959
Planning 2712
Support 396
Specification 420
Specification 989
Planning 2725
Specification 1073
Specification 2115
Specification 2445
Specification 2458
Specification 2906
Specification 2908
Back_End_Improvements 2909
Specification 1262
Specification 1263
Specification 1759
Support 1793
Specification 1845
Specification 1885
Feature 1953
Specification 2309
Planning 3001
Specification 2819
Specification 2856
Specification 2859
Specification 2884
Specification 2885
Specification 2886
Specification 2891
Specification 2922
Support 2995
Specification 2316
Planning 3002
Specification 2367
Back_End_Improvements 2735
Bug 2842
Back_End_Improvements 2848
Back_End_Improvements 2878
Specification 2888
Specification 2894
Specification 2897
Specification 2910
Specification 2918
Specification 2924
Back_End_Improvements 2927
Specification 2975
Back_End_Improvements 2990
Back_End_Improvements 2484
Specification 2522
Bug 2592
Specification 2596
Specification 2715
Suppor

In [21]:
print(csv[2725])

Issue:
#: 2725
project: Software
tracker: Planning
parent task: 2712
status: New
priority: Immediate
subject: User Events Issues Parent
author: James Small
assignee: <n/a>
updated: 07/07/2016 12:30 PM
category: <n/a>
sprint/milestone: 3.0
start date: 01/27/2012
due date: <n/a>
estimated time: 25.50
spent time: 0.00
% done: 36
created: 09/25/2015 12:22 PM
closed: <n/a>
related issues: <n/a>
type of request: <n/a>
product type: <n/a>
serial no. lot no.: <n/a>
followup actions: <n/a>
return material authorization: <n/a>
root cause: <n/a>
date approved: <n/a>
private: No


In [22]:
csv[2725]._line

'"2725","Software","Planning","2712","New","Immediate","User Events Issues Parent","James Small","","07/07/2016 12:30 PM","","3.0","01/27/2012","","25.50","0.00","36","09/25/2015 12:22 PM","","","","","","","","","","No"'

In [None]:
m1[0]
fields = m1[0].copy()
fields = [f.lower().replace(" ", "_").replace("%", "_").replace("/", "_") for f in fields]
s = "        def __init__(self, %s):"
s %= ", ".join(fields)
print(s)
s2 = " "*8+(" "*8).join("self._%s = %s\n" % (f, f) for f in fields)
print(s2)