In [14]:
userDownloadPath = os.path.join(os.path.expanduser('~'), 'downloads')
ogDownloadPath = userDownloadPath + "\\OGData\\"

In [58]:
import os, shutil, time, re
from enum import Enum
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException

class ResultView(Enum):
    Operator=1
    Field=2
    
class OGDataDownload:
    
    def __init__(self, downloads_folder, result_view, districts, years, months):        
        # define variables
        self.ogDownloadPath = downloads_folder
        self.chromeExePath = "C:\\Users\\spunna\\Downloads\\chromedriver_win32\\chromedriver.exe"
        self.webUrl = 'http://webapps2.rrc.texas.gov/EWA/productionQueryAction.do'
        self.downloadFile = 'ProductionDataQuery_Operator_ReportCsv.csv'
        self.masterFile = 'ProductionDataquery_Operator_All.csv'
        self.downloadedFilePath = self.ogDownloadPath + self.downloadFile
        self.masterFilePath = self.ogDownloadPath + self.masterFile
        self.result_view = result_view
        self.years = years
        self.months = months
        self.districts = districts

        # open web page
        self.prepare_download_folder()
        self.driver = self.open_webpage()
            
    def open_webpage(self):

        # set downloads folder as default download path for Chrome browser
        options = webdriver.ChromeOptions()
        prefs = {"profile.default_content_settings.popups": 0,
             "download.default_directory": ogDownloadPath, # IMPORTANT - ENDING SLASH V IMPORTANT
             "directory_upgrade": True}
        options.add_experimental_option("prefs", prefs)
        driver = webdriver.Chrome(self.chromeExePath, options = options) 
        
        # open web page
        driver.get(self.webUrl)
        time.sleep(5)
        return driver
    
    def prepare_download_folder(self):
        #clear downloads folder
        for file in os.listdir(self.ogDownloadPath):
            os.unlink(self.ogDownloadPath + file)
            
        view_fields = ''
        if self.result_view == ResultView.Operator:
            view_fields = 'OperatorName, OperatorNo,'
        else:
            view_fields = 'FieldName, FieldNo,'
        # write header to the master file
        with open(self.masterFilePath, "w") as m:
            m.write('District, Year, Month,' + view_fields +  ' Oil(BBL), Casinghead(MCF), GWGas(MCF), Condensate(BBL)\n')
                       
    def prepare_options(self):
        # get options for district, year, month combinations      
        options = [(d, y, m) for d in self.districts for y in self.years for m in self.months]
        print('Total files for district, year, month combinations:', len(options))
        return options    
           
    def download(self):
        options = self.prepare_options()
        print('Processing options:')
        columns = ['District', 'Year', 'Month', 'Count']
        download_df = pd.DataFrame(columns = columns)
        for option in options:
            district, year, month = option
            count = self.download_data(district, year, month)
            download_df = download_df.append({'District': district,
                                            'Year': year,
                                            'Month': month,
                                            'Count': count}, ignore_index=True)         
        self.driver.quit()
        return download_df

    def download_data(self, district, year, month):
        self.driver.find_element_by_xpath("(//input[@name='searchArgs.initialViewArg'])[2]").click()
        self.driver.find_element_by_name("searchArgs.startMonthArg").click()
        Select(self.driver.find_element_by_name("searchArgs.startMonthArg")).select_by_visible_text(month)
        self.driver.find_element_by_name("searchArgs.startMonthArg").click()
        self.driver.find_element_by_name("searchArgs.startYearArg").click()
        Select(self.driver.find_element_by_name("searchArgs.startYearArg")).select_by_visible_text(year)
        self.driver.find_element_by_name("searchArgs.startYearArg").click()
        self.driver.find_element_by_name("searchArgs.endMonthArg").click()
        Select(self.driver.find_element_by_name("searchArgs.endMonthArg")).select_by_visible_text(month)
        self.driver.find_element_by_name("searchArgs.endMonthArg").click()
        self.driver.find_element_by_name("searchArgs.endYearArg").click()
        Select(self.driver.find_element_by_name("searchArgs.endYearArg")).select_by_visible_text(year)
        self.driver.find_element_by_name("searchArgs.endYearArg").click()
        self.driver.find_element_by_id("district").click()
        Select(self.driver.find_element_by_id("district")).select_by_visible_text(district)
        self.driver.find_element_by_id("district").click()
        self.driver.find_element_by_name("submit").click()
        time.sleep(2) # wait of two seconds for results

        has_result, download_button = self.get_download_button()
        if (has_result):
            # click Download
            download_button.click()
            time.sleep(2) # wait of two seconds for download
            # go back and search again
            self.driver.find_element_by_link_text("Search Criteria").click()
            prefix =  district +',' + year +','+ month
            return self.append_content(prefix)
        
    def get_download_button(self):
        try: btn = self.driver.find_element(by=By.XPATH, value='.//input[@name="unused" and @value="Download"]')
        except NoSuchElementException as e: return False, None
        return True, btn

    def is_element_present(self, how, what):
        try: self.driver.find_element(by=how, value=what)
        except NoSuchElementException as e: return False
        return True

    def is_alert_present(self):
        try: self.driver.switch_to_alert()
        except NoAlertPresentException as e: return False
        return True

    def close_alert_and_get_its_text(self):
        try:
            alert = self.driver.switch_to_alert()
            alert_text = alert.text
            if self.accept_next_alert:
                alert.accept()
            else:
                alert.dismiss()
            return alert_text
        finally: self.accept_next_alert = True

    def append_content(self, prefix):
    
        # get data from downloaded file excluding first 7 lines and last line,
        # prepend district, year, month prefix to each line and write to master file
        with open(self.downloadedFilePath, "r") as d:
            prefix_content = [prefix+line for line in d.readlines()[8:-1]]
        row_count = len(prefix_content)
        with open(self.masterFilePath, "a") as m:
            for line in prefix_content:
                m.write(line)

        # delete downloaded file
        os.unlink(self.downloadedFilePath)
        return row_count
    

In [None]:
%%time
years = [str(y) for y in range(1993, 2019, 1)]
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Dec']
districts = ['01', '02', '03', '04', '05', '06', '6E', '7B', '7C', '08', '8A', '09', '10']
operators = OGDataDownload(ogDownloadPath, ResultView.Field, districts, ['2019'], months)
df = operators.download()
df

In [60]:
df

Unnamed: 0,District,Year,Month,Count
0,1,2019,Jan,1445
1,1,2019,Feb,1445
