In [None]:
import time
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from slugify import slugify

In [None]:
from libtech_lib.generic.commons import logger_fetch
from libtech_lib.wrappers.sn import driverInitialize, driverFinalize
from libtech_lib.generic.aws import get_aws_parquet, upload_s3
from libtech_lib.generic.html_functions import get_dataframe_from_html
from libtech_lib.rayatubarosa.models import RBCrawler, RBLocation
from libtech_lib.generic.api_interface import api_get_tag_id


In [None]:
logger = logger_fetch()

In [None]:
df = pd.read_csv('villages.csv')
#df

In [None]:
class Crawler():
    """Selenium crawler class for crawling ryatu barosa website"""
    def __init__(self):
        self.driver = driverInitialize(timeout=3)
        #self.driver = ''
        self.vars = {}
        self.district_url = 'https://ysrrythubharosa.ap.gov.in/RBApp/Reports/RBDistrictPaymentAbstract'
        self.payment_url = 'https://ysrrythubharosa.ap.gov.in/RBApp/Reports/PaymentvillReport'
       # /data/locations/ap_census/village_all_ap_villages/
        self.census_parquet_village_filename = "data/locations/ap_census/all_ap_villages/part-00000-9270a97e-3293-45b6-b1fd-2fed8304fc12-c000.snappy.parquet"
    def teardown_method(self):
        """To tear down the class"""
        driverFinalize(self.driver)
    def read_census_parquet(self, logger):
        """Will read the census parquet file from Amazon S3"""
        dataframe = get_aws_parquet(self.census_parquet_village_filename)
        logger.info(dataframe.head())
    def wait_for_window(self, timeout=2):
        """This function will wait for the new window to open based on timeout
        and return the new window handle"""
        time.sleep(round(timeout / 1000))
        wh_now = self.driver.window_handles
        wh_then = self.vars["window_handles"]
        if len(wh_now) > len(wh_then):
            return set(wh_now).difference(set(wh_then)).pop()
        return None
    def login_portal(self, logger):
        """this function will log in to the website"""
        url = 'https://ysrrythubharosa.ap.gov.in/RBApp/RB/Login'
        logger.info('Fetching URL[%s]' % url)
        self.driver.get(url)
        time.sleep(3)

        user = ''
        elem = self.driver.find_element_by_xpath('//input[@type="text"]')
        logger.info('Entering User[%s]' % user)
        elem.send_keys(user)

        password = ''
        elem = self.driver.find_element_by_xpath('//input[@type="password"]')
        logger.info('Entering Password[%s]' % password)
        elem.send_keys(password)


        login_button = '(//button[@type="button"])[2]'

        elem = self.driver.find_element_by_xpath(login_button)
        logger.info('Clicking Login Button')
        time.sleep(15)
        elem.click()
        #input()

    def print_current_window_handles(self, logger, event_name=None):
        """Debug function to print all the window handles"""
        handles = self.driver.window_handles
        logger.info(f"Printing current window handles after {event_name}")
        for index, handle in enumerate(handles):
            logger.info(f"{index}-{handle}")

    def fetch_eligibility_report(self, logger):
        """This function will download the Publication Eligibility Report"""

        # First we will Login in to the rythu bharosa website
        self.login_portal(logger)
        
        #Go to the 
        # url = 'https://ysrrythubharosa.ap.gov.in/RBApp/Reports/PublicationEligibilityReport'
        # logger.info('Fetching URL[%s]' % url)
        # self.driver.get(url)
        return self.driver
        


In [None]:
my_crawler = Crawler()

In [None]:
driver = my_crawler.fetch_eligibility_report(logger)

In [None]:
url = 'https://ysrrythubharosa.ap.gov.in/RBApp/Reports/PublicationEligibilityReport'
driver.get(url)

In [None]:
elem = Select(driver.find_element_by_xpath('/html/body/section/div/div/div[2]/div/div[2]/div[2]/div[2]/div/select'))
elem

In [None]:
for o in elem.options:
    print(o.get_attribute('value'))

In [None]:
elem.options[1].text

In [None]:
elem.options[1].get_attribute('value')

## Fetch the village names and codes

In [None]:
village_select = Select(driver.find_element_by_xpath('/html/body/section/div/div/div[2]/div/div[2]/div[1]/div[3]/div/select'))

In [None]:
villageDFs = []
data_dir = '~/tmp/RB/PublicationEligibilityReport'
block_name = 'G. Madugula'

for i, o in enumerate(village_select.options):
    print(o)
    if not i:
        continue
    village_code = o.get_attribute('value')
    village_name = o.text
    logger.info(f'Selecting Village[{i}] = [{village_code},{slugify(village_name)}]')
    try:
        village_select.select_by_value(village_code)
        time.sleep(3)
        land_select = Select(driver.find_element_by_xpath('/html/body/section/div/div/div[2]/div/div[2]/div[2]/div[2]/div/select'))
    except Exception as e:
        logger.error(f'Exception during select ofVillage[{village_code},{slugify(village_name)}] - EXCEPT[{type(e)}, {e}]')
        logger.warning(f'Skipping Village[{village_code}]')

    for j, o in enumerate(land_select.options):
        code = o.get_attribute('value')
        land_type = o.text
        print(code)
        if not j:
            continue
        try:
            land_select.select_by_value(o.get_attribute('value'))
            status_select = Select(driver.find_element_by_xpath('/html/body/section/div/div/div[2]/div/div[2]/div[2]/div[3]/div/select'))
        except Exception as e:
            logger.error(f'Exception during select land type[{code}] - EXCEPT[{type(e)}, {e}]')

        for k, o in enumerate(status_select.options):
            code = o.get_attribute('value')
            status = o.text
            print(code)
            if not k:
                continue
            try:
                status_select.select_by_value(o.get_attribute('value'))
                submit_button = driver.find_element_by_xpath('/html/body/section/div/div/div[2]/div/div[2]/div[3]/div/input')
                submit_button.click()
            except Exception as e:
                logger.error(f'Exception during select land type[{code}] - EXCEPT[{type(e)}, {e}]')

            try:
                WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.XPATH, "//button[@class='swal2-confirm swal2-styled']"))).click()
                logger.info(f'Skipping for of Village[{village_code}, {village_name}]')
                continue
            except Exception as e:
                logger.info(f'Moving ahead with Village[{village_code},{slugify(village_name)}] - EXCEPT[{type(e)}, {e}]')

            while True:
                try:
                    WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "//input[@class='btn btn-primary']")))
                    logger.info(f'Found Data')
                    myhtml = driver.page_source
                except Exception as e:
                    logger.error(f'When reading HTML source land_type[{land_type}] of Village[{village_code}, {(village_code)}] - EXCEPT[{type(e)}, {e}]')

                #dfs=pd.read_html(myhtml)
                df = pd.read_html(myhtml, attrs = {'id': 'tblapproval'})[0]        
                df['land_type'] = land_type
                df['status'] = status
                df['village_name_tel'] = village_name
                df['village_code'] = village_code
                df['block_name'] = block_name
                df.to_csv(f'{data_dir}/{block_name}_{village_name}-{village_code}.csv', index=False)
                villageDFs.append(df)
                logger.info(f'Adding the table for village[{village_code}] and type[{land_type}]')

                try:
                    elem = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.LINK_TEXT, '›')))
                    parent = elem.find_element_by_xpath('..')
                    logger.info(f'parent[{parent.get_attribute("class")}] elem[{elem.get_attribute("class")}]')
                    if 'disabled' in parent.get_attribute("class"):
                        logger.info(f'Disabled so end here!')
                        break
                    else:
                        elem.click()
                        time.sleep(5)
                        continue
                except Exception as e:
                    logger.info(f'No pagination here!')
                    break

            #break
        #break
    #break



In [None]:
df = pd.concat(villageDFs).reset_index(drop=True)

In [None]:
df.to_csv(f'{data_dir}/all.csv', index=False)