In [None]:
def clean(string):
    import re
    string = string.replace("\\xa0","")
    string = string.replace("[","")
    string = string.replace("]","")
    string = string.replace("'","")
    string = string.replace("\"","")
    string = string.replace("\\n","")
    string = string.replace("Sept","Sep")
    string = string.replace("July","Jul")
    string = string.replace("June","Jun")
    string = re.sub('\s+',' ',string)
    string = string.strip()
    return(string)

def amendments_senate(start = "01/01/2020",end=None):
    from lxml import html
    import requests
    import re
    import pandas as pd
    from datetime import datetime, timedelta

    #date formatting
    if end == None:
        dateend = end = datetime.now().date()       
    else:
        dateend = datetime.strptime(end, '%m/%d/%Y').date()
    datestart = datetime.strptime(start, '%m/%d/%Y').date()

    #import data from webpage
    url = "https://www.congress.gov/advanced-search/command-line?query=actionCode:(94000%20OR%2095000)%20latestActionDateStr:[%22"+str(datestart.strftime("%Y-%m-%d"))+"%22%20TO%20%22"+str(dateend.strftime("%Y-%m-%d"))+"%22]%20billType:%22SAmdt%22&searchResultViewType=compact&KWICView=false&pageSize=250"
    print(url)
    webpage = requests.get(url)
    tree = html.fromstring(webpage.content)

    #get number of pages of results
    try:
        iterations = int(re.sub("[^0-9]", "", str(tree.xpath("//div[@class='pagination']//span[@class='results-number']/text()"))))
    except:
        iterations = 1

    #create empty results dataframe
    cols = ["Amendment","Bill or Amendment Amended","Sponsor","Sponsor Party","Latest Action","Latest Action Date","Action Type","Legislation Type"]
    results = pd.DataFrame(columns=cols)

    #function to get all the info
    def get_amdt(results,datestart,dateend,iterations):
        #get webpage
        url = "https://www.congress.gov/advanced-search/command-line?query=actionCode:(94000%20OR%2095000)%20latestActionDateStr:[%22"+str(datestart.strftime("%Y-%m-%d"))+"%22%20TO%20%22"+str(dateend.strftime("%Y-%m-%d"))+"%22]%20billType:%22SAmdt%22&searchResultViewType=compact&KWICView=false&pageSize=250&page="+str(iterations)
        webpage = requests.get(url)
        tree = html.fromstring(webpage.content)
        #start extracting amdt data
        a_list = tree.xpath("//li[@class='compact']//span[@class='result-heading amendment-heading']")
        #get amendment number
        amdt_to = [clean(str(a.xpath("a[2]/text()"))) for a in a_list]
        #get number of amendment amended
        amdt = [clean(str(a.xpath("a[1]/text()"))) for a in a_list]
        #get bill amended
        amends_list = tree.xpath("//*//li[@class='compact']//span[@class='result-item'][contains(.//strong,'Amends')]")
        amends = [clean(str(a.xpath("span/a/text()[normalize-space()]"))) for a in amends_list]
        #replace bill amended with amendment amended, if relevant
        amends = [amdt_to[i] if amdt_to[i] != '' else amends[i] for i in range(len(amdt_to))]
        #get sponsors
        sponsor_list = tree.xpath("//*//li[@class='compact']//span[@class='result-item'][contains(.//strong,'Sponsor')]")
        sponsor = [clean(str(a.xpath("span/a/text()[normalize-space()]"))) for a in sponsor_list]
        #get party
        party = [sponsor[s][sponsor[s].find("-")-1] for s in range(len(sponsor))]
        #get latest actions
        latest_list = tree.xpath("//*//li[@class='compact']//span[@class='result-item'][contains(.//strong,'Latest')]")
        latest = [clean(str(a.xpath("span//text()")).replace(", "," ")) for a in latest_list]
        latest = [str(l).replace("( ","(") for l in latest]
        latest = [str(l).replace(" )",")") for l in latest]
        #extract date from latest actions
        latest_date = [datetime.strptime(l[0:8], '%m/%d/%y').date() for l in latest]
        #get vote type
        vote_type = list()
        for l in latest:
            if "Unanimous Consent" in l:
                vote_type.append("UC")
            elif "Yea-Nay" in l:
                vote_type.append("RCV")
            elif "Voice Vote" in l:
                vote_type.append("VV")
            else:
                vote_type.append("ERROR: INPUT BY HAND")
        #get legislation type        
        leg_type = list()
        for r in amends:
            if "S.Amdt" in r:
                leg_type.append("Amendment")
            elif "Con" in r:
                leg_type.append("Budget")
            elif "Res" in r:
                leg_type.append("Resolution")
            else:
                leg_type.append("Legislation")

        #assemble results dataframe        
        results_hold = pd.DataFrame(zip(amdt,amends,sponsor,party,latest,latest_date,vote_type,leg_type),columns=cols)
        results = results.append(results_hold)
        return(results)

    #get data for as many pages of results there are on congress.gov
    for i in range(1,iterations+1):
        results = get_amdt(results,datestart,dateend,i)

    # assemble summary dataframe
    summary_values = [len(results),
                      results["Sponsor Party"].value_counts()["R"] if "R" in results["Sponsor Party"].value_counts() else 0,
                      results["Sponsor Party"].value_counts()["D"] if "D" in results["Sponsor Party"].value_counts() else 0,
                      results["Action Type"].value_counts()["UC"] if "UC" in results["Action Type"].value_counts() else 0,
                      results["Action Type"].value_counts()["VV"] if "VV" in results["Action Type"].value_counts() else 0,
                      results["Action Type"].value_counts()["RCV"] if "RCV" in results["Action Type"].value_counts() else 0,
                      results["Legislation Type"].value_counts()["Legislation"] if "Legislation" in results["Legislation Type"].value_counts() else 0,
                      results["Legislation Type"].value_counts()["Resolution"] if "Resolution" in results["Legislation Type"].value_counts() else 0,
                      results["Legislation Type"].value_counts()["Amendment"] if "Amendment" in results["Legislation Type"].value_counts() else 0,
                      results["Legislation Type"].value_counts()["Budget"] if "Budget" in results["Legislation Type"].value_counts() else 0]


    indexes = ["Total Amendments","R Amendments","D Amendments","Total Unanimous Consent","Total Voice Vote","Total Roll Call Vote","Bills Amended","Resolutions Amended","Amendments Amended","Budget Resolutions Amended"]
    summary = pd.DataFrame(summary_values,index=indexes,columns=["Values"])

    return(results,summary)

In [None]:
def conference(start='01/01/2020',end = None):
    from PyPDF2 import PdfFileReader
    import PyPDF2
    import io
    import requests 
    from datetime import datetime, timedelta
    import pandas as pd
    import re
    if end == None:
        dateend = end = datetime.now().date()       
    else:
        dateend = datetime.strptime(end, '%m/%d/%Y').date()
    datestart = datetime.strptime(start, '%m/%d/%Y').date()
    diff = (dateend - datestart).days
    url = ("https://www.govinfo.gov/link/ccal/house/Bills%20Through%20Conference?link-type=pdf&publishdate=" + str(dateend.strftime("%Y")) + "-" + str(dateend.strftime("%m")) + "-" + str(dateend.strftime("%d")))
    print(url)
    for i in range(diff):
        if '200' in str(requests.get(url)):
            break
        else:
            dateend = dateend -  timedelta(days=1)
            url = ("https://www.govinfo.gov/link/ccal/house/Bills%20Through%20Conference?link-type=pdf&publishdate=" + str(dateend.strftime("%Y")) + "-" + str(dateend.strftime("%m")) + "-" + str(dateend.strftime("%d")))
    r = requests.get(url)
    f = io.BytesIO(r.content)
    reader = PyPDF2.PdfFileReader(f)
    page_num = 0
    for i in range(10):
        try:
            contents = reader.getPage(page_num).extractText().split('\n')
        except:
            break
        else:
            page_num = page_num + 1
    l = ["H.R.","S.","H.Con","H.J","S.Con","S.J","S.Con"]
    res = []
    for x in range(page_num):
        contents = reader.getPage(x).extractText().split('\n')
        for row in contents:
            t = str(row)
            for i in l:
                if i in t:
                    val = t.find(i)
                    val2 = t.find("Å ")
                    if t[val:val2] in res:
                        continue
                    try:
                        type(int(re.sub("[^0-9]", "", t[val:val2])))
                    except:
                        continue
                    else:
                        res.append(t[val:val2])
    l = [[len(res),res,url]]
    ret = pd.DataFrame(l,columns = [str("Measures Considered through Conference "+start+" and "+dateend.strftime("%m/%d/%Y")+"."),"Measures","Check Here"])
    return(ret)

In [None]:
def reportedbills(start,end):
    from PyPDF2 import PdfFileReader
    import PyPDF2
    import io
    import requests 
    from datetime import datetime, timedelta
    import pandas as pd
    import numpy as np
    import re
    from lxml import html
    import calendar 
    from dateutil.relativedelta import relativedelta

    #date formatting
    if end == None:
        dateend = end = datetime.now().date()       
    else:
        dateend = datetime.strptime(end, '%m/%d/%Y').date()
    datestart = datetime.strptime(start, '%m/%d/%Y').date()

    #get number of months in date range
    def diff_month(d2,d1):
        return (d1.year - d2.year) * 12 + d1.month - d2.month
    nummonths = int(diff_month(datestart,dateend))

    results = pd.DataFrame(columns=["Senate","House"])

    for i in range(nummonths+1):
        date2 = datestart.replace(day=int(calendar.monthrange(int(datestart.strftime("%Y")),int(datestart.strftime("%m")))[1]))
        url = "https://www.congress.gov/advanced-search/command-line?query=legislativeSource:%22Congressional%20Record%22%20title:%22*Resume*%22%20crArticleDateStr:[%22"+str(datestart.strftime("%Y-%m-%d"))+"%22%20TO%20%22"+str(date2.strftime("%Y-%m-%d"))+"%22]&searchResultViewType=expanded&KWICView=false"
        webpage = requests.get(url)
        tree = html.fromstring(webpage.content)
        try:
            l = tree.xpath("//div[@class='cdg-summary-wrapper']//ul//li/a/@href")[0]
        except:
            try:
                l = tree.xpath("//*//li[@class='expanded']//span[@class='result-heading congressional-record-heading']//@href")[0]
                webpage2 = requests.get(l)
                tree2 = html.fromstring(webpage2.content)
                f2 = io.BytesIO(webpage2.content)
                l2 = tree2.xpath("//div[@class='cdg-summary-wrapper']//ul//li/a/@href")[0]
                l = l2
            except:
                datestart += relativedelta(months=1)
                date2 = datestart.replace(day=int(calendar.monthrange(int(datestart.strftime("%Y")),int(datestart.strftime("%m")))[1]))
                url = "https://www.congress.gov/advanced-search/command-line?query=legislativeSource:%22Congressional%20Record%22%20title:%22*Resume*%22%20crArticleDateStr:[%22"+str(datestart.strftime("%Y-%m-%d"))+"%22%20TO%20%22"+str(date2.strftime("%Y-%m-%d"))+"%22]&searchResultViewType=expanded&KWICView=false"
                webpage = requests.get(url)
                tree = html.fromstring(webpage.content)
                try:
                    l = tree.xpath("//div[@class='cdg-summary-wrapper']//ul//li/a/@href")[0]
                except:
                    try:
                        l = tree.xpath("//*//li[@class='expanded']//span[@class='result-heading congressional-record-heading']//@href")[0]
                        webpage2 = requests.get(l)
                        tree2 = html.fromstring(webpage2.content)
                        f2 = io.BytesIO(webpage2.content)
                        l2 = tree2.xpath("//div[@class='cdg-summary-wrapper']//ul//li/a/@href")[0]
                        l = l2
                    except:
                        break
        url = 'https://www.congress.gov'+l
        webpage = requests.get(url)
        f = io.BytesIO(webpage.content)
        reader = PyPDF2.PdfFileReader(f)
        txt = reader.getPage(0).extractText().split('\n')
        vals = str(txt)[str(txt).find("through")-30:str(txt).find("through")+30]
        try:
            dates = list()
            for match in re.finditer(r'(\b\d{1,2}\D{0,3})?\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?)\D?(\d{1,2}\D?)?\D?((19[7-9]\d|20\d{2})|\d{2})', vals,re.S):
                dates.append(match.group())
            through = str(dates[0]) + " through " + str(dates[1])
        except:
            match1 = re.search(r'(\b\d{1,2}\D{0,3})?\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?)\D?(\d{1,2}\D?)?\D?((19[7-9]\d|20\d{2})|\d{2})', vals)
            match = re.search(r'(\b\d{1,2}\D{0,3})?\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?)\D?(\d{1,2})', vals)
            through = str(match.group()+" through "+match1.group())

        #find reported bills data in pdf text
        reported = str(txt)[str(txt).find("Measures reported"):str(txt).find("Measures reported")+str(txt)[str(txt).find("Measures reported"):].find("concurrent")].split(",")
        reported = [i if i.find("total") < 0 else " " for i in reported]
        reported = [i.replace(".................................."," ") for i in reported]
        reported = [i.replace("..............."," ") for i in reported]

        #generate totals
        totals = list()
        for x in range(len(reported)):
            t = [i for i in clean(reported[x]).split() if i == ".." or i.isnumeric()]
            if t:
                totals.append(t[0:2])

        Senate = [int(t[0]) for t in totals if t[0].isnumeric()]
        House = [int(t[1]) for t in totals if t[1].isnumeric()]
        Senate = np.array(Senate).sum()
        House = np.array(House).sum()
        #add to results dataframe
        res = [[Senate,House,url]]

        res = pd.DataFrame(res,columns=["Senate","House","URL"],index=[("Number of Bills Reported in Range: "+through)])
        results = results.append(res)


        datestart += relativedelta(months=1)

    return(results)

In [None]:
def unrep(chamber,start = "01/01/2020",end=None):
    from lxml import html
    import requests
    import re
    import pandas as pd
    from datetime import datetime, timedelta
    if end == None:
        dateend = end = datetime.now().date()       
    else:
        dateend = datetime.strptime(end, '%m/%d/%Y').date()
    datestart = datetime.strptime(start, '%m/%d/%Y').date()
    if chamber == "House of Representatives":
        url = "https://www.congress.gov/advanced-search/command-line?query=actionCode%3A7000+NOT+5000+latestActionDateStr%3A%5B%22"+str(datestart.strftime("%Y-%m-%d"))+"%22+TO+%22"+str(dateend.strftime("%Y-%m-%d"))+"%22%5D&searchResultViewType=compact&q=%7B%22type%22%3A%5B%22bills%22%2C%22joint-resolutions%22%5D%7D&pageSize=250"
    elif chamber == "Senate":
        url = "https://www.congress.gov/advanced-search/command-line?query=actionCode%3A16000+NOT+14000+latestActionDateStr%3A%5B%22"+str(datestart.strftime("%Y-%m-%d"))+"%22+TO+%22"+str(dateend.strftime("%Y-%m-%d"))+"%22%5D&searchResultViewType=compact&q=%7B%22type%22%3A%5B%22bills%22%2C%22joint-resolutions%22%5D%7D&pageSize=250"
    else:
        print("Issue with chamber input.")
        
    webpage = requests.get(url)
    tree = html.fromstring(webpage.content)
    try:
        iterations = int(re.sub("[^0-9]", "", str(tree.xpath("//div[@class='pagination']//span[@class='results-number']/text()"))))
    except:
        iterations = 1
    #create empty results dataframe
    cols = ["Measure","Latest","Congress","Legislation Title","Date Introduced","House or Senate"]
    results = pd.DataFrame(columns = cols)

    #for each page of results, extract info and add it to results dataframe
    for i in range(1,int(iterations)+1):

        webpage = requests.get("https://www.congress.gov/advanced-search/command-line?query=actionCode:16000+latestActionDateStr:[%22"+str(datestart.strftime("%Y-%m-%d"))+"%22+TO+%22"+str(dateend.strftime("%Y-%m-%d"))+"%22]&searchResultViewType=compact&pageSize=250&page="+str(i))
        tree = html.fromstring(webpage.content)

        Bill_Code = tree.xpath("//*//ol[@class='basic-search-results-lists expanded-view']//li[@class='compact']/span[@class='result-heading']/a/text()")
        Latest = tree.xpath("//*//ol[@class='basic-search-results-lists expanded-view']//li[@class='compact']//span[@class='result-item'][contains(strong/text(),'Latest Action')]/span/text()[1][normalize-space()]")
        Congress = tree.xpath("//*//ol[@class='basic-search-results-lists expanded-view']//li[@class='compact']//span[@class='result-heading']//a/following-sibling::text()")
        Leg_Title = tree.xpath("//*//ol[@class='basic-search-results-lists expanded-view']//li[@class='compact']//span[@class='result-title bottom-padding']//text()")
        Date_Intro = tree.xpath("//div[@id='main']//li[@class='compact']//span[@class='result-item'][1]/span[1]/a[1]/following-sibling::text()[1]")
        House_or_Senate = [chamber]

        zipped = zip(Bill_Code,Latest,Congress,Leg_Title,Date_Intro,House_or_Senate*len(Date_Intro))
        results = results.append(pd.DataFrame(list(zipped),columns = cols))

    #format date introduced as datetimes
    intro = [s.replace("(Introduced ","") for s in results["Date Introduced"]]
    intro = ["".join([i if i.isnumeric() or i =="/" else "" for i in intro[date]]) for date in range(len(intro))]
    intro = [datetime.strptime(clean(s),"%m/%d/%Y").date() for s in intro]
    results["Date Introduced"] = intro

    #COMPILE SUMMARY DATA
    month_quarters = {1:1,2:1,3:1,4:2,5:2,6:2,7:3,8:3,9:3,10:4,11:4,12:4}
    results["Quarter"] = [month_quarters.get(date.month) for date in results["Date Introduced"]]
    results["Year"] = [date.year for date in results["Date Introduced"]]

    summ = list()
    for year in results["Year"].unique():
        for quarter in range(1,5):
            try:
                summ.append(["Unreported Bills in "+str(year)+" - Quarter "+str(quarter),results[results["Year"]==year]["Quarter"].value_counts()[quarter]])
            except:
                summ.append(["Unreported Bills in "+str(year)+" - Quarter "+str(quarter),0])
    summary = pd.DataFrame(summ,columns=["Metric","Count"])        

    return(results,summary)


In [67]:
from lxml import html
import requests
from datetime import datetime,timedelta

import re 
from lxml import html
import requests
import re
import pandas as pd
from datetime import datetime, timedelta

def clean(string):
    import re
    string = string.replace("\\xa0","")
    string = string.replace("[","")
    string = string.replace("]","")
    string = string.replace("'","")
    string = string.replace("\"","")
    string = string.replace("\\n","")
    string = string.replace("Sept","Sep")
    string = string.replace("July","Jul")
    string = string.replace("June","Jun")
    string = re.sub('\s+',' ',string)
    string = string.strip()
    return(string)




def workingdays(chamber, start, end):

    #format date
    if end == None:
        dateend = end = datetime.now().date()       
    else:
        dateend = datetime.strptime(end, '%m/%d/%Y').date()
    datestart = datetime.strptime(start, '%m/%d/%Y').date()
    #get date range
    diff = (dateend - datestart).days
    #set up results dataframe
    cols = ["House or Senate","Date","Time Convened","Time Adjourned","Working Day?","Link"]
    df = pd.DataFrame(columns=cols)

    #class/function to test if the url is functional and return next valid date and url
    class TestClass:
        def __init__(self,x,y):
            self.datestart = x
            self.url = y

    def web_test(datestart):

        url = "https://www.congress.gov/congressional-record/"+str('{d.year}/{d.month}/{d.day}'.format(d=datestart))+"/daily-digest"
        while '404' in str(requests.get(url)) or '503' in str(requests.get(url)):
            datestart = datestart + timedelta(days=1)
            url = "https://www.congress.gov/congressional-record/"+str('{d.year}/{d.month}/{d.day}'.format(d=datestart))+"/daily-digest"
            if datestart >= dateend:
                break
        return(TestClass(datestart,url))

    #function to format date strings into dates
    def timeconv(x):
        x = re.sub("a.m.","AM",x)
        x = re.sub("p.m.","PM",x)
        x = re.sub("noon","PM",x)
        x = re.sub("\[","",x)
        x = re.sub("\]","",x)
        x = re.sub("\'","",x)
        x = re.sub(",","",x)
        try:
            val = datetime.strptime(x,"%I %p")
            return(val)

        except:
            try:
                val = datetime.strptime(x,"%I:%M %p")
                return(val)
            except:
                x = x.replace(".","")
                val = datetime.strptime(x,"%I:%M %p")
                return(val)

    #function to get date string from congress.gov url   
    def get_strg(url,chamber):
        webpage = requests.get(url)
        tree = html.fromstring(webpage.content) 
        strg = clean(",".join(tree.xpath("//div[@class='main-wrapper']//p[contains(./strong,'Adjournment:')]//text()")))
        strg = strg.replace("Adjournment:","")
        str_chamber = strg[:strg.find("House")] if chamber == "Senate" else strg[strg.find("House"):]

        ch = "House" if chamber == "House of Representatives" else "Senate"
        if ch not in strg:
            strg = clean(",".join(tree.xpath("//div[@class='main-wrapper']//center[contains(./h2/text(),'"+ch+"')]//following-sibling::p[contains(./strong,'Adjournment:')]//text()")))
            strg = (ch +" "+ strg) if strg else None
            if not strg:
                strg = clean(",".join(tree.xpath("//div[@class='main-wrapper']//center[contains(./h2/text(),'"+ch+"')]//following-sibling::p[contains(./strong,'Recess:')]//text()")))
                strg = (ch +" "+ strg) if strg else None
                if not strg:
                    strg = clean(str(tree.xpath("//div[@class='main-wrapper']//center[./h2/text()='"+chamber+"']//following-sibling::p[1]/text()")))
            str_chamber = strg
        if not strg or not str_chamber:
            str_chamber = tree.xpath("//div[@class='main-wrapper']//center[./h2/text()='"+chamber+"']//following-sibling::p[1]/text()")
            str_chamber = [i for i in str_chamber if ch in i]
            str_chamber = clean(str(str_chamber))
            if not str_chamber:
                str_chamber = tree.xpath("//div[@class='main-wrapper']//center[contains(./strong,'Adjournment:')]//following-sibling::text()[1]")
                str_chamber = [i for i in str_chamber if ch in i]
                str_chamber = clean(str(str_chamber))

        if not str_chamber:
            hld = tree.xpath("//div[@class='main-wrapper']/center[./h3/em/text()='Chamber Action']/preceding-sibling::center[h2[contains(./text(),'"+ch+"')]]/following-sibling::p[contains(./text(),'"+ch+"')][1]/text()")
            hld = hld[0] if len(hld) > 0 else hld
            str_chamber = clean(str(hld))
            if not str_chamber:
                try:
                    if chamber == "House of Representatives":
                        l = tree.xpath("//h3[./a[@name='daily-digest-house-of-representatives']]//following-sibling::div[1]//text()")
                        str_chamber = clean(",".join([i for i in l if "Adjournment:" in i or "Chamber Action" in i]))
                    else:
                        l = tree.xpath("//h3[./a[@name='daily-digest-highlights-senate']]//following-sibling::div[1]//text()")

                        vals = [i for i in range(len(l)) if "Adjournment:" in l[i] or "Chamber Action" in l[i]]
                        new = [i+1 for i in vals]
                        new2 = [i+2 for i in vals]
                        vals = vals + new + new2

                        str_chamber = clean(",".join([l[i] for i in vals]))
                except:
                    try:
                        if chamber == "House of Representatives":
                            l = tree.xpath("//h3[./a[@name='daily-digest-house-of-representatives']]//following-sibling::div[1]//text()")
                            str_chamber = clean(",".join([i for i in l if "Recess:" in i or "Chamber Action" in i]))
                        else:
                            l = tree.xpath("//h3[./a[@name='daily-digest-highlights-senate']]//following-sibling::div[1]//text()")

                            vals = [i for i in range(len(l)) if "Recess:" in l[i] or "Chamber Action" in l[i]]
                            new = [i+1 for i in vals]
                            new2 = [i+2 for i in vals]
                            vals = vals + new + new2

                            str_chamber = clean(",".join([l[i] for i in vals]))

                    except:
                        str_chamber = None
            if not str_chamber and chamber == "Senate":
                try:
                        l = tree.xpath("//h3[./a[@name='daily-digest-senate']]//following-sibling::div[1]//text()")
                        vals = [i for i in range(len(l)) if "Recess:" in l[i] or "Chamber Action" in l[i]]
                        new = [i+1 for i in vals]
                        new2 = [i+2 for i in vals]
                        vals = vals + new + new2

                        str_chamber = clean(",".join([l[i] for i in vals]))

                except:
                    try:
                            l = tree.xpath("//h3[./a[@name='daily-digest-senate']]//following-sibling::div[1]//text()")
                            vals = [i for i in range(len(l)) if "Recess:" in l[i] or "Chamber Action" in l[i]]
                            new = [i+1 for i in vals]
                            new2 = [i+2 for i in vals]
                            vals = vals + new + new2

                            str_chamber = clean(",".join([l[i] for i in vals]))
                    except:
                        str_chamber = None
        if ch not in str_chamber:
            str_chamber = None
        else:
            str_chamber = str_chamber.replace("at noon","12:00 pm")
            str_chamber = str_chamber.replace("noon","p.m.")
            if "12 midnight" in str_chamber:
                str_chamber = str_chamber.replace("12 midnight","12:00 am")
            elif "midnight" in str_chamber:
                str_chamber = str_chamber.replace("midnight","12:00 am")
        return(str_chamber)


    #function to extract relevant info from datestring
    def to_append(str_chamber,url,chamber,datestart,cols):

        timein, adj, conv, strg = None, None, None, None
        if "not in session" in str_chamber or "sine die" in str_chamber:
            return(pd.DataFrame([[chamber,datestart,None,None,"NS",url]],columns=cols))
        elif "pro forma" in str_chamber:
            return(pd.DataFrame([[chamber,datestart,None,None,"PF",url]],columns=cols))

        else:
            times = list()

            for i in re.finditer(r'([0-1]?[0-9]|2[0-3]):?[0-5]?[0-9]? ?([AaPp][.]?[Mm][.]?)', str_chamber):
                times.append(i.group())

            if len(times) < 2:

                times = list()

                for i in re.finditer(r'([0-1]?[0-9]|2[0-3]):?[0-5]?[0-9]? ?([AaPp][.]?[Mm][.]?)?', str_chamber):
                    times.append(i.group())

                for time in range(len(times)):
                    if 'm' not in times[time]:
                        try:
                            for x in re.finditer(r'([AaPp][.]?[Mm][.]?)', str(times[0])):
                                pm_am = x.group()
                            times[time] = (times[time] + " " + pm_am)
                        except:
                            times[time] = (times[time] + " pm")

            try:
                times = [time.replace(".","") for time in times]
                conv = timeconv(times[0])
                adj = timeconv(times[1])
                timein = (adj-conv).seconds//60

                if timein >= 60:
                    return(pd.DataFrame([[chamber,datestart,conv.time(),adj.time(),"X",url]],columns=cols))
                else:
                    return(pd.DataFrame([[chamber,datestart,conv.time(),adj.time(),"PF",url]],columns=cols))

            except:
                if "and is in recess" in str_chamber or "stands in recess" in str_chamber or "still in session" in str_chamber or "stands adjourned" in str_chamber and times[0]:
                    return(pd.DataFrame([[chamber,datestart,timeconv(times[0]).time(),"-","X",url]],columns=cols))
                else:
                    return(pd.DataFrame([[chamber,datestart,"HOLD","HOLD","HOLD",url]],columns=cols))

    def errorcatch(url):
        webpage = requests.get(url)
        tree = html.fromstring(webpage.content)

        text = tree.xpath("//div[@class='txt-box']//text()")
        keywords = ["met at","adjourned","not in session","pro forma session"]

        if not text:
            text = tree.xpath("//div[@class='main-wrapper']//text()")

        ch = "House" if chamber == "House of Representatives" else "Senate"
        str_chamber = None

        for x in text:
            if "Adjournment" in x:
                if ch in x:
                    str_chamber = clean(str(x))
        if not str_chamber:
            str_chamber = list()
            for x in text:
                for key in keywords:
                    if key in x and ch in x:
                        str_chamber.append(x) if x not in str_chamber else None
            str_chamber = clean(str(str_chamber))
        if not str_chamber:
            l = tree.xpath("//div[@class='main-wrapper']//following-sibling::center[h2[contains(./text(),'"+ch+"')]]//following-sibling::p//text()")
            str_chamber = clean(join(",",[i for i in l if ch in i]))
        return(to_append(str_chamber,url,chamber,datestart,cols))

    #for each chamber for each relevant day, get the info and put it into the results dataframe
    for y in range(diff):
        datestart1 = datestart + (timedelta(days=1)*0 if y == 0 else timedelta(days=1))
        url, datestart = web_test(datestart1).url, web_test(datestart1).datestart
        
        if datestart >= dateend:
            break
        webpage = requests.get(url)
        tree = html.fromstring(webpage.content) 
        str_chamber = get_strg(url,chamber)

        if not str_chamber:
            if "more than one issue for" in str(tree.xpath("//div[@class='main-wrapper']//h2[@class='alt']//text()")):
                urls = tree.xpath("//div[@class='main-wrapper']//ul[@class='plain margin7']//li//@href")
                for url in urls:
                    str_chamber = get_strg(url,chamber)
                    app = to_append(str_chamber,url,chamber,datestart,cols)
                    df = df.append(app)
            elif "No digest text" in clean(str(tree.xpath("//div[@class='main-wrapper']//p//text()"))):
                congress = clean(str(tree.xpath("//div[@class='featured']//h1//span[contains(text(),'Congress')]//text()")))
                app = pd.DataFrame([[chamber,datestart,"HOLD","INPUT BY HAND: example available pdfs at the following link","congress.gov"+tree.xpath("//p[@class='daily-digest-navigation']//a//@href")[0],congress]],columns=cols)
                df = df.append(app)
            else:
                try:
                    app = errorcatch(url)
                    df = df.append(app)
                except:
                    df = df.append(pd.DataFrame([[chamber,datestart,"HOLD","HOLD","UNKNOWN ERROR: INPUT BY HAND",url]],columns=cols))
        else:
            app = to_append(str_chamber,url,chamber,datestart,cols)
            df = df.append(app)
        
    month_quarters = {1:1,2:1,3:1,4:2,5:2,6:2,7:3,8:3,9:3,10:4,11:4,12:4}
    df["Quarter"] = [month_quarters.get(d.month) for d in df["Date"]]
    display(df)
    return(df)

In [72]:
# house_1998 = workingdays("House of Representatives","01/01/1998","12/31/1998")
# house_1998.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/house1998.csv")

# house_1999 = workingdays("House of Representatives","01/01/1999","12/31/1999")
# house_1999.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/house1999.csv")

# house_2000 = workingdays("House of Representatives","01/01/2000","12/31/2000")
# house_2000.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/house2000.csv")

# senate_1995 = workingdays("Senate","01/01/1995","12/31/1995")
# senate_1995.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/senate1995.csv")

# senate_1996 = workingdays("Senate","01/01/1996","12/31/1996")
# senate_1996.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/senate1996.csv")

# senate_1997 = workingdays("Senate","01/01/1997","12/31/1997")
# senate_1997.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/senate1997.csv")

# senate_1998 = workingdays("Senate","01/01/1998","12/31/1998")
# senate_1998.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/senate1998.csv")

# senate_1999 = workingdays("Senate","01/01/1999","12/31/1999")
# senate_1999.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/senate1999.csv")

# senate_2000 = workingdays("Senate","01/01/2000","12/31/2000")
# senate_2000.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/senate2000.csv")

In [None]:
# CATCHING ERRORS FROM EXISTING FILES

# from datetime import datetime
# from datetime import timedelta
# import pandas as pd

# house_full = pd.read_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/house_full.csv")
# senate_full = pd.read_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/senate_full.csv")

# ch = "Senate"

# for i in house_full[house_full["Time Convened"]=="HOLD"].index:
#     try:
#         start = datetime.strptime(str(house_full.loc[i]["Date"]),"%Y-%m-%d").strftime("%m/%d/%Y")
#         end = (datetime.strptime(str(house_full.loc[i]["Date"]),"%Y-%m-%d")+timedelta(days=1)).strftime("%m/%d/%Y")

#         house_full.loc[i] = workingdays(ch,start,end).loc[0]

# #         display(house_full.loc[i])
#     except:
#         print("Error")
#         display(house_full.loc[i])
#         print(house_full.loc[i]["Congress"])


# for i in senate_full[senate_full["Time Convened"]=="HOLD"].index:
#     try:
#         start = datetime.strptime(str(senate_full.loc[i]["Date"]),"%Y-%m-%d").strftime("%m/%d/%Y")
#         end = (datetime.strptime(str(senate_full.loc[i]["Date"]),"%Y-%m-%d")+timedelta(days=1)).strftime("%m/%d/%Y")

#         senate_full.loc[i] = workingdays(ch,start,end).loc[0]
        
#         print(i,len(senate_full[senate_full["Time Convened"]=="HOLD"]))
        
#     except:
#         print("Error")
#         display(senate_full.loc[i])
#         print(senate_full.loc[i]["Congress"])


In [None]:
import pandas as pd

# house_full.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/house_full.csv",index=False)
senate_full.to_csv("D:/OneDrive - Bipartisan Policy Center/Congress/HCI/Working Days/senate_full.csv",index=False)

In [None]:
def cloture(congress=116,start = None,end=None):
    import requests
    import time
    import re
    import pandas as pd
    from datetime import datetime, timedelta    
    from lxml import html
    import urllib.request
    from bs4 import BeautifulSoup
    from selenium import webdriver

    #date formatting (dependent on if date given)
    if end:
        dateend = datetime.strptime(end, '%m/%d/%Y').date()
    if start:
        datestart = datetime.strptime(start, '%m/%d/%Y').date()

    url = "https://www.senate.gov/legislative/cloture/" + str(congress) + ".htm"



    #to make this work you would need to download phantom JS executable and replace the path below with yours
    driver = webdriver.PhantomJS(executable_path ="D:\\phantomjs\\bin\\phantomjs.exe")
    # get web page
    driver.get(url)
    # execute script to scroll down the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
    # sleep for 5s
    time.sleep(5)


    #set basics for scrape (website based on congress #)

    webpage = requests.get(url)
    tree = html.fromstring(webpage.content)

    date_str = str(tree.xpath("//table//tr//td[@class='contenttext']/text()[normalize-space()]"))
    yr = int(re.search(r'\d{4}',date_str).group())

    results = {"List Name":"List Item"}

    for col in range(1,8):
        res = [clean(i.text.replace("\n",", ")) for i in driver.find_elements_by_xpath("//tr[count(td[@colspan])<=0][count(td[@style])=0]//td["+str(col)+"]")]
        if col == 6:
            Vote_Number = [x[x.find("No."):] for x in res]
            for x in range(len(res)):
                i = res[x]
                if len(i.split(",")) > 1:
                    res[x] = clean(str([clean(s[:s.find("No.")]) for s in i.split(",")]))
                else:
                    if "No." in i:
                        res[x] = clean(i[:i.find("No.")])
        if col <= 3:
            res.pop(0) if res[0] == "" else None
        results.update( {col: res} )

    #zip results together and make into one dataframe    
    zipped = zip(results.get(1),results.get(2),results.get(3),results.get(4),results.get(5),results.get(6),Vote_Number,results.get(7))
    cols = ["Date Filed","Measure","Subject","Filed By","Date of Motion","Vote","Vote No.","Result"]
    results = pd.DataFrame(zipped,columns=cols)

    #remove footnotes which mess up date format
    vals = results["Date Filed"]
    for date in range(len(vals)):
        g = re.search(r'[0-9]{3,}',vals[date])
        results["Date Filed"].loc[date] = (vals[date][:len(vals[date])-1]) if g else vals[date] 

    #address "reconsidered"
    doubles = results[results["Date of Motion"].astype(str).str.contains(',')]
    for ITER in range(len(doubles.index)):

        doubles = results[results["Date of Motion"].str.contains(",")]
        i = doubles.index[0]

        #create copy of row with "Reconsidered" (double lines)
        copy = doubles.loc[i].copy()

        #remove misc info from original
        original = doubles.loc[i].copy()
        subj_string = str(doubles["Subject"][i]).split(",")
        original["Subject"] = clean(str(subj_string[0]) + (subj_string[1] if len(subj_string)==3 else ""))
        original["Date of Motion"] = clean(str(doubles["Date of Motion"][i]).split(",")[0])
        original["Vote"] = clean(str(doubles["Vote"][i]).split(",")[0])
        original["Vote No."] = clean(str(doubles["Vote No."][i]).split(",")[0])
        original["Result"] = clean(str(doubles["Result"][i]).split(",")[0])

        results.loc[i] = original

        #create isolate relevant info from copy
        copy["Date of Motion"] = clean(str(doubles["Date of Motion"][i]).split(",")[1])
        copy["Vote"] = clean(str(doubles["Vote"][i]).split(",")[1])
        copy["Vote No."] = clean(str(doubles["Vote No."][i]).split(",")[1])
        copy["Result"] = clean(str(doubles["Result"][i]).split(",")[1])

        results = results.append(copy)
        results = results.sort_index(axis=0)  
        results.reset_index(drop=True,inplace=True)

    #replace dates with datetimes
    for date in range(len(results)):
        if len(str(results["Date Filed"].loc[date])) > 2 and results["Date Filed"].loc[date] is not None:
            try:
                results["Date Filed"].loc[date] = datetime.strptime(results["Date Filed"].loc[date], ('%b %d')).date()
            except:
                print("Error","\n Row = ", date)

        if len(str(results["Date of Motion"].loc[date])) > 2 and results["Date of Motion"].loc[date] is not None:
            try:
                results["Date of Motion"].loc[date] = datetime.strptime(results["Date of Motion"].loc[date], ('%b %d')).date()
            except:
                print("Error","\n Row = ", date)

    #replace default years with proper (based on index order)
    date_cols = [results["Date Filed"],results["Date of Motion"]]
    for date_col in date_cols:
        for row in results.index:
            if date_col[row]:
                if date_col[row] < date_col[row+1]:
                    yr_change = row + 1
                    break
            else:
                date_col[row] = "No date."

        date_col.loc[0:yr_change] = [D.replace(year=yr+1) if str(type(D)) == "<class 'datetime.date'>" else None for D in date_col.loc[0:yr_change]]
        date_col.loc[yr_change:len(results)] = [D.replace(year=yr) if str(type(D)) == "<class 'datetime.date'>" else None for D in date_col.loc[yr_change:len(results)]]

    #filter by given dates
    if start:
        results = results[results["Date Filed"]>datestart]
    if end:
        results = results[results["Date Filed"]<dateend]

    #add in legislation type column
    leg_type = list()
    for row in results.index:
        if "motion to proceed" in str(results["Subject"].loc[row]):
            leg_type.append("Motion to Proceed") 
        elif "PN" in results["Measure"].loc[row]:
            leg_type.append("Nomination")
        elif "PN" not in results["Measure"].loc[row] and "motion to proceed" not in results["Measure"].loc[row]:
            leg_type.append("Legislation")
        else:
            leg_type.append("ERROR: Hand Input Required")

    results.insert(loc=2,column="Legislation Type",value=leg_type)

    #BEGINNING TO COMPILE SUMMARY DATA
    month_quarters = {1:1,2:1,3:1,4:2,5:2,6:2,7:3,8:3,9:3,10:4,11:4,12:4}
    results["Quarter"] = [month_quarters.get(date.month) for date in results["Date Filed"]]
    results["Year"] = [date.year for date in results["Date Filed"]]


    filters = ["Legislation","Nomination","Motion to Proceed"]
    iter = 0
    qtr = 1
    year = yr

    cols = ["Q1-"+str(year),"Q2-"+str(year),"Q3-"+str(year),"Q4-"+str(year),"Q1-"+str(year+1),"Q2-"+str(year+1),"Q3-"+str(year+1),"Q4-"+str(year+1)]
    index = list()
    for iter in range(len(filters)):
        l = [str(filters[iter])+"-Motions Filed",str(filters[iter])+"-Votes",str(filters[iter])+"-Voted Inv",str(filters[iter])+"-Votes Failed",str(filters[iter])+"-Reconsidered"]
        index = index + l

    summary = pd.DataFrame(columns=cols,index=index)
    #for each filter, get summary data
    for iter in range(len(filters)): 
        for year in range(yr,yr+2):
            for qtr in range(1,5):
                #filter full df
                if filters[iter] == "Legislation":
                    filtered_recon = results[results["Legislation Type"]!="Nomination"][results["Quarter"]==qtr][results["Year"]==year][results["Subject"].str.contains("recons")]
                    filtered = results[results["Legislation Type"]!="Nomination"][results["Quarter"]==qtr][results["Year"]==year]
                else:
                    filtered_recon = results[results["Legislation Type"]==filters[iter]][results["Quarter"]==qtr][results["Year"]==year][results["Subject"].str.contains("recons")]
                    filtered = results[results["Legislation Type"]==filters[iter]][results["Quarter"]==qtr][results["Year"]==year] 
                #count results
                summary.loc[str(filters[iter])+"-Motions Filed","Q"+str(qtr)+"-"+str(year)] = len(filtered) - len(filtered_recon)
                summary.loc[str(filters[iter])+"-Reconsidered","Q"+str(qtr)+"-"+str(year)] = len(filtered_recon)
                summary.loc[str(filters[iter])+"-Voted Inv","Q"+str(qtr)+"-"+str(year)] = len(filtered[filtered["Result"]=="I"])
                summary.loc[str(filters[iter])+"-Votes Failed","Q"+str(qtr)+"-"+str(year)] = len(filtered[filtered["Result"]=="F"])
                summary.loc[str(filters[iter])+"-Votes","Q"+str(qtr)+"-"+str(year)] =  len(filtered[filtered["Result"]=="I"]) + len(filtered[filtered["Result"]=="F"])

    #still need to fix so it doesn't tally unnecesarily if there is ONLY one year in question
    return(results,summary)


In [None]:
congress=116
start = "01/01/2019"
end = "06/30/2020"


# df = workingdays("Senate",start,end)
# df_2 = workingdays("House of Representatives",start,end)
amendments_results,amendments_summary = amendments_senate(start,end)
conference_res = conference(start,end)
reported_bills = reportedbills(start,end)
unrephouse_results,unrephouse_summary = unrep("House of Representatives",start,end)
unrepsen_results,unrepsen_summary = unrep("Senate",start,end)
cloture_results,cloture_summary = cloture(congress=congress,start=start,end=end)

In [None]:
dir_ = "D:\\OneDrive - Bipartisan Policy Center\\Congress\\HCI\\2020Q2\\raw\\"
to_download = [amendments_results,amendments_summary,conference_res,reported_bills,unrephouse_results,unrephouse_summary,unrepsen_results,unrepsen_summary,cloture_results,cloture_summary]
to_download_names = ["amendments_results","amendments_summary","conference","reported_bills","unrephouse_results","unrephouse_summary","unrepsen_results","unrepsen_summary","cloture_results","cloture_summary"]

from datetime import datetime
for item in range(len(to_download)):
    to_download[item].to_csv(dir_+to_download_names[item]+str(datetime.now().date())+".csv")
    
# df.to_csv(dir_+"WD-Senate-"+str(datetime.now().date())+".csv",index="False")
# df_2.to_csv(dir_+"WD-House-"+str(datetime.now().date())+".csv",index="False")

In [None]:
from PyQt5.QtCore import QDateTime, Qt, QTimer,pyqtSlot,QUrl,QDir
from PyQt5 import QtCore
from PyQt5.QtWidgets import (QApplication, QCheckBox, QComboBox, QDateTimeEdit,
        QDial, QDialog, QGridLayout, QGroupBox, QHBoxLayout, QLabel, QLineEdit,
        QProgressBar, QPushButton, QRadioButton, QScrollBar, QSizePolicy,
        QSlider, QSpinBox, QStyleFactory, QTableWidget, QTabWidget, QTextEdit,
        QVBoxLayout, QWidget,QMessageBox,QApplication, QWidget, QInputDialog, QLineEdit, QFileDialog)
from PyQt5.QtGui import QIcon,QValidator
import re 


def getall_summary(start,end):
    import pandas as pd
    from datetime import datetime
    import re
    out = re.sub("/","\\\\",(QDir.homePath()+"/Downloads/"+'HCISummary'+datetime.now().strftime("-%m-%Y")+'.xlsx'))
    writer = pd.ExcelWriter(out, engine='xlsxwriter')    
    UNresults,UNsummary = unrep_house(start,end)
    UNSresults,UNSsummary = unrep_senate(start,end)
    AMresults,AMsummary = amendments_senate(start,end)
    CLresults,CLsummary = cloture(start,end)
    WDresults,WDsummary = workingdays(start,end)
    reported = reportedbills(start,end)
    conf = conference(start,end)
    
    dfs = {'Conference': conf, 
           'House Unreported': UNsummary, 
           'Senate Unreported': UNSsummary,
          'Senate Amendments': AMsummary,
           'Cloture':CLsummary, 
           'Working Days':WDsummary,
          'Reported Bills':reported}
    for sheetname, df in dfs.items():  # loop through `dict` of dataframes
        df.to_excel(writer, sheet_name=sheetname)  # send df to writer
        worksheet = writer.sheets[sheetname]  # pull worksheet object
        for idx, col in enumerate(df):  # loop through all columns
            nlevels = df.index.nlevels 
            series = df[col]
            max_len = max((
                series.astype(str).map(len).max(),  # len of largest item
                len(str(series.name))  # len of column name/header
                )) + 1  # adding a little extra space    
            worksheet.set_column(idx+nlevels, idx+nlevels, max_len)  # set column width
    writer.sheets['Conference'].set_column('A:A',1)
    writer.sheets['House Unreported'].set_column('A:A',30)
    writer.sheets['Senate Unreported'].set_column('A:A',30)
    writer.sheets['Senate Amendments'].set_column('A:A',30)
    writer.sheets['Cloture'].set_column('A:A',19)
    writer.sheets['Working Days'].set_column('A:A',11)
    writer.sheets['Reported Bills'].set_column('A:A',70)
    writer.save()
    out = re.sub("/","\\\\",(QDir.homePath()+"/Downloads/"+'HCIRaw'+datetime.now().strftime("-%m-%Y")+'.xlsx'))
    writer = pd.ExcelWriter(out, engine='xlsxwriter')
    reported.reset_index(level=0, inplace=True)
    reported.rename(columns={"index": ""}, inplace=True)
    dfs = {'Conference': conf, 
           'House Unreported': UNresults, 
           'Senate Unreported': UNSresults,
          'Senate Amendments': AMresults,
           'Cloture': CLresults, 
           'Working Days': WDresults,
          'Reported Bills':reported}
    for sheetname, df in dfs.items():  
        df.to_excel(writer, sheet_name=sheetname,index=False) 
        worksheet = writer.sheets[sheetname] 
        for idx, col in enumerate(df):
            nlevels = df.index.nlevels 
            series = df[col]
            max_len = max((
                series.astype(str).map(len).max(),
                len(str(series.name))
                )) + 1    
            if max_len == 1:
                max_len = 10
            worksheet.set_column(idx, idx, max_len) 
    writer.save()

class WidgetGallery(QDialog):
    def __init__(self, parent=None):
        super(WidgetGallery, self).__init__(parent)
        styleComboBox = QComboBox()
        styleComboBox.addItems(QStyleFactory.keys())
        self.originalPalette = QApplication.palette()
        
        self.createTopLeftGroupBox()
        self.createTopRightGroupBox()
        self.createBottomLeftGroupBox()
        self.createBottomRightGroupBox()
        self.createTopRightGroupBox2()
        self.createBottomLeftGroupBox2()

        self.createTopTopGroupBox()
        
        mainLayout = QGridLayout()

        mainLayout.addWidget(self.topTopGroupBox,0,0,1,2)
        mainLayout.addWidget(self.topRightGroupBox2,1,0,1,2)
        mainLayout.addWidget(self.topLeftGroupBox, 2, 0,1,2)
        mainLayout.addWidget(self.topRightGroupBox, 4, 1)
        mainLayout.addWidget(self.bottomLeftGroupBox, 3, 0)
        mainLayout.addWidget(self.bottomRightGroupBox, 3, 1)
        mainLayout.addWidget(self.bottomLeftGroupBox2,4,0)
        mainLayout.setRowStretch(1, 1)
        mainLayout.setRowStretch(2, 1)
        mainLayout.setColumnStretch(0, 1)
        mainLayout.setColumnStretch(1, 1)
        self.setLayout(mainLayout)
        self.originalPalette = QApplication.palette()

        QApplication.setStyle(QStyleFactory.create("Fusion"))
        self.setWindowIcon(QIcon("C:\\Users\\rorey\\AppData\\Local\\Programs\\Python\\Python38-32\\Scripts\\iconx.ico"))
        self.setWindowTitle("HCI Data Pull")

    def createTopTopGroupBox(self):
        self.topTopGroupBox = QGroupBox()
        self.topTopGroupBox.setTitle("All Summary Data")
        lineEdit = QLineEdit()
        lineEdit.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title = QLabel("Start Date:")
        
        lineEdit2 = QLineEdit()
        lineEdit2.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title2 = QLabel("End Date:")

        defaultPushButton1 = QPushButton("Download All Data")
        defaultPushButton1.setDefault(True)

        defaultPushButton1.clicked.connect(lambda: self.OpenClick(ind = 20,start=lineEdit.text(),end=lineEdit2.text()))
                
        layout = QGridLayout()
        layout.addWidget(lineedit_title, 0, 0, 1, 2)
        layout.addWidget(lineEdit, 1, 0, 1, 2)
        layout.addWidget(lineedit_title2, 2, 0, 1, 2)
        layout.addWidget(lineEdit2, 3, 0)
        layout.addWidget(defaultPushButton1,4,0)
          
        self.topTopGroupBox.setLayout(layout)

    def createTopLeftGroupBox(self):
        self.topLeftGroupBox = QGroupBox("Unreported Bills")
        
        lineEdit = QLineEdit()
        lineEdit.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title = QLabel("Start Date:")
        
        lineEdit2 = QLineEdit()
        lineEdit2.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title2 = QLabel("End Date:")

        defaultPushButton1 = QPushButton("Get Raw House Data")
        defaultPushButton1.setDefault(True)

        defaultPushButton1.clicked.connect(lambda: self.OpenClick(ind = 10,start=lineEdit.text(),end=lineEdit2.text()))
        
        defaultPushButton2 = QPushButton("Get Raw Senate Data")
        defaultPushButton2.setDefault(True)

        defaultPushButton2.clicked.connect(lambda: self.OpenClick(ind = 12,start=lineEdit.text(),end=lineEdit2.text()))
        
         
        defaultPushButton4 = QPushButton("Get Summary House Data")
        defaultPushButton4.setDefault(True)

        defaultPushButton4.clicked.connect(lambda: self.OpenClick2(ind = 10,start=lineEdit.text(),end=lineEdit2.text()))
        
        defaultPushButton3 = QPushButton("Get Summary Senate Data")
        defaultPushButton3.setDefault(True)

        defaultPushButton3.clicked.connect(lambda: self.OpenClick2(ind = 12,start=lineEdit.text(),end=lineEdit2.text()))       
        
        layout = QGridLayout()
        layout.addWidget(lineedit_title, 0, 0, 1, 2)
        layout.addWidget(lineEdit, 1, 0, 1, 2)
        layout.addWidget(lineedit_title2, 2, 0, 1, 2)
        layout.addWidget(lineEdit2, 3, 0)
        layout.addWidget(defaultPushButton1,4,0)
        layout.addWidget(defaultPushButton2,5,0)
        layout.addWidget(defaultPushButton4,6,0)
        layout.addWidget(defaultPushButton3,7,0)
          
        self.topLeftGroupBox.setLayout(layout)
        
    def createTopRightGroupBox(self):
        self.topRightGroupBox = QGroupBox("Senate Amendments")
        
        lineEdit = QLineEdit()
        lineEdit.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title = QLabel("Start Date:")
        
        lineEdit2 = QLineEdit()
        lineEdit2.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title2 = QLabel("End Date:")

        defaultPushButton1 = QPushButton("Get Raw Senate Amendment Data")
        defaultPushButton1.setDefault(True)

        defaultPushButton1.clicked.connect(lambda: self.OpenClick(ind = 4,start=lineEdit.text(),end=lineEdit2.text()))
        
        defaultPushButton2 = QPushButton("Get Summary Senate Amendment Data")
        defaultPushButton2.setDefault(True)

        defaultPushButton2.clicked.connect(lambda: self.OpenClick2(ind = 4,start=lineEdit.text(),end=lineEdit2.text()))  
        
        layout = QGridLayout()
        layout.addWidget(lineedit_title, 0, 0, 1, 2)
        layout.addWidget(lineEdit, 1, 0, 1, 2)
        layout.addWidget(lineedit_title2, 2, 0, 1, 2)
        layout.addWidget(lineEdit2, 3, 0)
        layout.addWidget(defaultPushButton1,4,0)
        layout.addWidget(defaultPushButton2,5,0)
          
        self.topRightGroupBox.setLayout(layout)
        

    def createBottomLeftGroupBox(self):
        self.bottomLeftGroupBox = QGroupBox("Cloture")
        
        lineEdit = QLineEdit()
        lineEdit.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title = QLabel("Start Date:")
        
        lineEdit2 = QLineEdit()
        lineEdit2.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title2 = QLabel("End Date:")

        defaultPushButton1 = QPushButton("Get Raw Cloture Data")
        defaultPushButton1.setDefault(True)

        defaultPushButton1.clicked.connect(lambda: self.OpenClick(ind = 2,start=lineEdit.text(),end=lineEdit2.text()))
        
        defaultPushButton2 = QPushButton("Get Summary Cloture Data")
        defaultPushButton2.setDefault(True)

        defaultPushButton2.clicked.connect(lambda: self.OpenClick2(ind = 2,start=lineEdit.text(),end=lineEdit2.text()))  
        
        layout = QGridLayout()
        layout.addWidget(lineedit_title, 0, 0, 1, 2)
        layout.addWidget(lineEdit, 1, 0, 1, 2)
        layout.addWidget(lineedit_title2, 2, 0, 1, 2)
        layout.addWidget(lineEdit2, 3, 0)
        layout.addWidget(defaultPushButton1,4,0)
        layout.addWidget(defaultPushButton2,5,0)
          
        self.bottomLeftGroupBox.setLayout(layout)

    def createBottomRightGroupBox(self):
        self.bottomRightGroupBox = QGroupBox("Working Days")
        lineEdit = QLineEdit()
        lineEdit.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title = QLabel("Start Date:")
        
        lineEdit2 = QLineEdit()
        lineEdit2.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title2 = QLabel("End Date:")

        defaultPushButton1 = QPushButton("Get Raw Working Days Data")
        defaultPushButton1.setDefault(True)

        defaultPushButton1.clicked.connect(lambda: self.OpenClick(ind = 0,start=lineEdit.text(),end=lineEdit2.text()))
        
        defaultPushButton2 = QPushButton("Get Summary Working Days Data")
        defaultPushButton2.setDefault(True)

        defaultPushButton2.clicked.connect(lambda: self.OpenClick2(ind = 0,start=lineEdit.text(),end=lineEdit2.text()))  
               
        layout = QGridLayout()
        layout.addWidget(lineedit_title, 0, 0, 1, 2)
        layout.addWidget(lineEdit, 1, 0, 1, 2)
        layout.addWidget(lineedit_title2, 2, 0, 1, 2)
        layout.addWidget(lineEdit2, 3, 0)
        layout.addWidget(defaultPushButton1,4,0)
        layout.addWidget(defaultPushButton2,5,0)
          
        self.bottomRightGroupBox.setLayout(layout)
        
    def createTopRightGroupBox2(self):
        self.topRightGroupBox2 = QGroupBox("Reported Bills")
        
        lineEdit = QLineEdit()
        lineEdit.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title = QLabel("Start Date:")
        
        lineEdit2 = QLineEdit()
        lineEdit2.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title2 = QLabel("End Date:")

        defaultPushButton1 = QPushButton("Get Reported Bills Data")
        defaultPushButton1.setDefault(True)

        defaultPushButton1.clicked.connect(lambda: self.OpenClick(ind = 14,start=lineEdit.text(),end=lineEdit2.text()))
        
        
        layout = QGridLayout()
        layout.addWidget(lineedit_title, 0, 0, 1, 2)
        layout.addWidget(lineEdit, 1, 0, 1, 2)
        layout.addWidget(lineedit_title2, 2, 0, 1, 2)
        layout.addWidget(lineEdit2, 3, 0)
        layout.addWidget(defaultPushButton1,4,0)
        self.topRightGroupBox2.setLayout(layout)
        
    def createBottomLeftGroupBox2(self):
        self.bottomLeftGroupBox2 = QGroupBox("Conference")
        lineEdit = QLineEdit()
        lineEdit.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title = QLabel("Start Date:")
        
        lineEdit2 = QLineEdit()
        lineEdit2.setPlaceholderText('MM/DD/YYYY')
        
        lineedit_title2 = QLabel("End Date:")

        defaultPushButton1 = QPushButton("Get Raw Conference Data")
        defaultPushButton1.setDefault(True)

        defaultPushButton1.clicked.connect(lambda: self.OpenClick(ind = 6,start=lineEdit.text(),end=lineEdit2.text()))
        
        defaultPushButton2 = QPushButton("Get Summary Conference Data")
        defaultPushButton2.setDefault(True)

        defaultPushButton2.clicked.connect(lambda: self.OpenClick2(ind = 6,start=lineEdit.text(),end=lineEdit2.text()))
        
        layout = QGridLayout()
        layout.addWidget(lineedit_title, 0, 0, 1, 2)
        layout.addWidget(lineEdit, 1, 0, 1, 2)
        layout.addWidget(lineedit_title2, 2, 0, 1, 2)
        layout.addWidget(lineEdit2, 3, 0)
        layout.addWidget(defaultPushButton1,4,0)
        layout.addWidget(defaultPushButton2,5,0)
          
        self.bottomLeftGroupBox2.setLayout(layout)    
        
    def OpenClick(self,ind,start,end):
        import re
        from datetime import datetime
        if ind == 14: 
            HCIfuncs14 = {"reportedbills":reportedbills(start,end)}
            popup = QMessageBox()
            popup.setText(str("The file has been downloaded to "+str(QDir.homePath()+"/Downloads/"+str(list(eval(str("HCIfuncs"+str(ind))).keys())[0])+".csv")))
            y = list(eval(str("HCIfuncs"+str(ind))).values())[0]
            y.reset_index(inplace=True)
            y.to_csv(re.sub("/","\\\\",(QDir.homePath()+"/Downloads/"+str(list(eval(str("HCIfuncs"+str(ind))).keys())[0])+".csv")),header=True,index=True)
        if ind == 20:
            getall_summary(start,end)
            popup = QMessageBox()
            popup.setText(str("The files have been downloaded to "+QDir.homePath()+"/Downloads/"+'HCIRaw'+datetime.now().strftime("-%m-%Y")+'.xlsx'+" and "+QDir.homePath()+"/Downloads/"+'HCISummary'+datetime.now().strftime("-%m-%Y")+'.xlsx'))
        else:
            if ind == 0:
                HCIfuncs0 = {"workingdays":workingdays(start,end)[0]}
            elif ind == 2:
                HCIfuncs2 = {"cloture":cloture(start,end)[0]}
            elif ind == 4:
                HCIfuncs4 = {"senate_amendments":amendments_senate(start,end)[0]}
            elif ind == 6:
                HCIfuncs6 = {"conference":conference(start,end)}
            elif ind == 10:
                HCIfuncs10 = {"house_unreportedbills":unrep_house(start,end)[0]}
            elif ind == 12:
                HCIfuncs12 = {"senate_unreportedbills":unrep_senate(start,end)[0]}
            popup = QMessageBox()
            popup.setText(str("The raw data file has been downloaded to "+str(QDir.homePath()+"/Downloads/"+str(list(eval(str("HCIfuncs"+str(ind))).keys())[0])+".csv")))
            import re
            y = list(eval(str("HCIfuncs"+str(ind))).values())[0]
            y.to_csv(re.sub("/","\\\\",(QDir.homePath()+"/Downloads/"+str(list(eval(str("HCIfuncs"+str(ind))).keys())[0])+".csv")),index=None,header=True)
        popup.exec_()
    def OpenClick2(self,ind,start,end):
        import re
        from datetime import datetime
        if ind != 100:
            if ind == 0:
                HCIfuncs1 = {"workingdays":workingdays(start,end)[1]}
            elif ind == 2:
                HCIfuncs3 = {"cloture":cloture(start,end)[1]}
            elif ind == 4:
                HCIfuncs5 = {"senate_amendments":amendments_senate(start,end)[1]}
            elif ind == 6:
                HCIfuncs7 = {"conference":conference(start,end)}
            elif ind == 10:
                HCIfuncs11 = {"house_unreportedbills":unrep_house(start,end)[1]}
            elif ind == 12:
                HCIfuncs13 = {"senate_unreportedbills":unrep_senate(start,end)[1]}
            popup = QMessageBox()
            popup.setText(str("The summary file has been downloaded to "+str(QDir.homePath()+"/Downloads/"+str(list(eval(str("HCIfuncs"+str(ind+1))).keys())[0])+".csv")))
            z = list(eval(str("HCIfuncs"+str(ind+1))).values())[0] 
            z.to_csv(re.sub("/","\\\\",(QDir.homePath()+"/Downloads/"+str(list(eval(str("HCIfuncs"+str(ind+1))).keys())[0])+"SUMMARY.csv")),header=True)
        popup.exec_()

import sys

app = QApplication(sys.argv)
gallery = WidgetGallery()
gallery.show()
app.exec_()