In [None]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import json
import gc
import os

from selenium import webdriver
import chromedriver_binary
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options

In [None]:
#Create cache and output directories
for path in ['Outputs', 'Outputs/FSCMS_Caches']:
    if not os.path.exists(path):
        os.makedirs(path)
        
output_dir = 'Outputs/FSCMS_Caches/'

In [None]:
def open_fscms(driver):
    driver.get('http://fscms/default.aspx')
    adv_search_button = driver.find_element(By.ID, "ctl00_TreeView1t5")
    adv_search_button.click()
    
    pass

def get_results(driver):
    #Get paging info
    page_box = driver.find_element(By.XPATH, '''//*[@id="ctl00$ContentPlaceHolder1$gridSearchResults_pager"]/select''')
    page_select = Select(page_box)
    page_range = [int(element.get_attribute('textContent')) for element in page_select.options]

    #Get results
    data_dict = {}

    for page in page_range:
        if page == 1:
            pass
        else:
            page_box = driver.find_element(By.XPATH, '''//*[@id="ctl00$ContentPlaceHolder1$gridSearchResults_pager"]/select''')
            page_select = Select(page_box)
            page_select.select_by_visible_text(str(page))
            #WebDriverWait(driver, timeout=3).until(selenium.webdriver.support.expected_conditions.text_to_be_present_in_element_attribute(page_select.first_selected_option, 'textContent', str(page)))


        #Grab results elements
        r1_block = driver.find_element(By.ID, "G_ctl00xContentPlaceHolder1xgridSearchResults")
        r1_rows = r1_block.find_elements(By.CLASS_NAME,"ctl00xContentPlaceHolder1xgridSearchResults-no")

        n = 0

        for row in r1_rows:
            cells = row.find_elements(By.TAG_NAME, 'td')

            for cell in cells:
                if len(cell.find_elements(By.TAG_NAME, 'a')) > 0:
                    result_key = str(page)+'_'+cell.get_attribute('level')
                    data_dict[result_key] = cell.find_element(By.TAG_NAME, 'a').get_attribute('href')

                else:
                    result_key = str(page)+'_'+cell.get_attribute('level')
                    data_dict[result_key] = cell.find_element(By.TAG_NAME, 'nobr').get_attribute('textContent')

            n += 1
        
    return data_dict


In [None]:
def search_open_nonsenior_cases(driver):
    
    #Navigate to search
    open_fscms(driver)
    
    #Enter criteria
    status_box = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$drpStatus")
    status_select = Select(status_box)
    status_select.select_by_value("2053")

    search_button = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$btnSearch")
    search_button.click()
    
    #Get results
    cases = get_results(driver)
    
    return cases


def search_open_senior_cases(driver):
    
    #Create empty list for results
    results_dict_dict = {}
    
    #Navigate to search
    open_fscms(driver)
    
    ##Select for open, senior cases...
    status_box = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$drpStatus")
    status_select = Select(status_box)
    status_select.select_by_value("2053")
    
    program_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpPrograms"]')
    program_select = Select(program_box)
    program_select.select_by_value('1')
    
    #Iterate over boroughs, developments to pull cases
    
    ###Borough iterator
    for borough_id in ['1', '2', '3', '4', '5']:        
        borough_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpBorough"]')
        borough_selector = Select(borough_box)
        borough_selector.select_by_value(borough_id)
        
        dev_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpDevelopment"]')
        dev_selector = Select(dev_box)
        dev_values = [option.text for option in dev_selector.options]
        
        ###Dev iterator
        for dev in dev_values:
            dev_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpDevelopment"]')
            dev_selector = Select(dev_box)
            dev_selector.select_by_visible_text(dev)
            
            search_button = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$btnSearch")
            search_button.click()
            
            results_dict_dict[dev] = get_results(driver)
            
            with open(output_dir+'_results_cache.json', 'w') as file:
                json.dump(results_dict_dict, file)
    
    return results_dict_dict

def search_closed_cases(driver):
    
    results_dict = {}
    
    open_fscms(driver)
    
    status_box = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$drpStatus")
    status_select = Select(status_box)
    status_select.select_by_value("2054")
    
    person_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpWorker"]')
    person_select = Select(person_box)
    person_list = [person.text for person in person_select.options]
    
    for person in person_list:
        person_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpWorker"]')
        person_select = Select(person_box)
        person_select.select_by_visible_text(person)
        
        search_button = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$btnSearch")
        search_button.click()
        
        if len(driver.find_elements(By.XPATH, '//*[@id="ctl00_lblErrors"]'))>0:
            results_dict[person] = 'ERROR -- TOO MANY RESULTS'
            pass
        
        else:
            results_dict[person] = get_results(driver)
        
        with open(output_dir+'_rej_results_cache.json', 'w') as file:
            json.dump(results_dict, file)
            
    return results_dict
            
def search_closed_senior_cases(driver):
    
     #Create empty list for results
    results_dict_dict = {}
    
    #Navigate to search
    open_fscms(driver)
    
    ##Select for open, senior cases...
    status_box = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$drpStatus")
    status_select = Select(status_box)
    status_select.select_by_value("2054")
    
    program_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpPrograms"]')
    program_select = Select(program_box)
    program_select.select_by_value('1')
    
    #Iterate over boroughs, developments to pull cases
    
    ###Borough iterator
    for borough_id in ['1', '2', '3', '4', '5']:        
        borough_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpBorough"]')
        borough_selector = Select(borough_box)
        borough_selector.select_by_value(borough_id)
        
        dev_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpDevelopment"]')
        dev_selector = Select(dev_box)
        dev_values = [option.text for option in dev_selector.options]
        
        ###Dev iterator
        for dev in dev_values:
            dev_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpDevelopment"]')
            dev_selector = Select(dev_box)
            dev_selector.select_by_visible_text(dev)
            
            search_button = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$btnSearch")
            search_button.click()
            
            results_dict_dict[dev] = get_results(driver)
            
            with open(output_dir+'_rej_results_cache.json', 'w') as file:
                json.dump(results_dict_dict, file)
    
    return results_dict_dict

def search_rej_nonsenior_cases(driver):
    
     #Create empty list for results
    results_dict_dict = {}
    
    #Navigate to search
    open_fscms(driver)
    
    ##Select for open, senior cases...
    status_box = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$drpStatus")
    status_select = Select(status_box)
    status_select.select_by_value("2054")
    
    '''
    program_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpPrograms"]')
    program_select = Select(program_box)
    program_select.select_by_value('1')
    '''
    #Iterate over boroughs, developments to pull cases
    
    ###Borough iterator
    for borough_id in ['1', '2', '3', '4', '5']:        
        borough_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpBorough"]')
        borough_selector = Select(borough_box)
        borough_selector.select_by_value(borough_id)
        
        dev_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpDevelopment"]')
        dev_selector = Select(dev_box)
        dev_values = [option.text for option in dev_selector.options]
        
        ###Dev iterator
        for dev in dev_values:
            dev_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpDevelopment"]')
            dev_selector = Select(dev_box)
            dev_selector.select_by_visible_text(dev)
            
            search_button = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$btnSearch")
            search_button.click()
            
            results_dict_dict[dev] = get_results(driver)
            
            with open(output_dir+'_rej_results_cache.json', 'w') as file:
                json.dump(results_dict_dict, file)
    
    return results_dict_dict

def get_additional_case_info(driver, case_number_list):
    
    url_template = 'http://fscms/forms/default.aspx?case_id='
    data_dict = {}
    
    for case in case_number_list:
        driver.get(url_template+str(case))
    
        referral_source = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_igTabCaseAssesment_ctl02_drdReferralSource_drpReferralSource"]')
        source_selector = Select(referral_source).first_selected_option.get_attribute('textContent')
        
        source_name = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_igTabCaseAssesment_ctl02_drdReferralSource_txtReferralName"]').get_attribute('value')
        
        referral_date = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_PageHeader_FormViewMain"]/tbody/tr/td/table/tbody/tr/td[3]/strong').text
        
        data_dict[case] = {'Ref_Type': source_selector,
                           'Ref_Name': source_name,
                           'Ref_Date': referral_date}
        
        with open(output_dir+'_addtl_info_cache.json', 'w') as cache:
            json.dump(data_dict, cache)
            
    return data_dict
    

In [None]:
#options = Options()
#options.binary_location = r"C:\Users\sluggk\AppData\Local\Mozilla Firefox\firefox.exe"

#driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=options)

#rejected_nonsenior = search_rej_nonsenior_cases(driver)

with open(output_dir+'_rej_results_cache.json', 'r') as reader: 
    rejected_nonsenior = json.load(reader)

In [None]:
def format_results(results_dict):
    field_ids = {0: 'URL',
                 1: 'Case_ID',
                 2: 'Last',
                 3: 'First',
                 4: 'Borough',
                 5: 'Development',
                 6: 'Program',
                 7: 'Acct',
                 8: 'Worker',
                 9: 'Supv',
                10: '_u1',
                11: '_u2',
                12: '_u3',
                13: '_u4',
                14: 'Status'}
    
    def separate_elements(string):
        page, row, field = string.split('_')
        person_id = page+'_'+row
        
        return person_id, field
    
    intermediary = {}
    
    for key, value in results_dict.items():
        pid, field = separate_elements(key)
        
        if pid in intermediary.keys():
            intermediary[pid][field_ids[int(field)]] = value
        else:
            intermediary[pid] = {field_ids[int(field)]:value}
    
    clean_results_dict = {}
    
    for key, record in intermediary.items():
        clean_results_dict[record['Case_ID']] = record
        
    return clean_results_dict
    

In [None]:
rejected_complete = rejected_nonsenior
#for key, value in addtl_results_narrow.items():
 #   rejected_complete[key]=value
    
dict_list = []
for key, value in rejected_complete.items():
    if value == 'ERROR -- TOO MANY RESULTS':
        pass
    else:
        dict_list.append(format_results(value))
        
rejected_complete = {k:v for item in dict_list for k, v in item.items()}

rejected = pd.DataFrame(rejected_complete).transpose()

In [None]:
rejected

In [None]:
#Redoing select closed nonsenior pulls
driver = webdriver.Chrome(executable_path='chromedriver.exe')

results_dict = {}

open_fscms(driver)

status_box = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$drpStatus")
status_select = Select(status_box)
status_select.select_by_value("2055")

person_list = ['Sandra Bradley',
 'Sonia Brock',
 'Johnson Ebiotu',
 'Saron Mullings',
 'Noemi Ocasio',
 'Faosat Oki',
 'Manike Pervorfi',
 'Robert Pressley',
 'Eries Torres',
 'Luis Torres',
 'Sharon Walker']

for person in person_list:
    
    for borough_id in ['1', '2', '3', '4', '5']:
        borough_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpBorough"]')
        borough_selector = Select(borough_box)
        borough_selector.select_by_value(borough_id)
        
        dev_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpDevelopment"]')
        dev_selector = Select(dev_box)
        dev_values = [option.text for option in dev_selector.options]
        
        for dev in dev_values:
            

            dev_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpDevelopment"]')
            dev_selector = Select(dev_box)
            dev_selector.select_by_visible_text(dev)

            person_box = driver.find_element(By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_drpWorker"]')
            person_select = Select(person_box)
            person_select.select_by_visible_text(person)

            search_button = driver.find_element(By.NAME, "ctl00$ContentPlaceHolder1$btnSearch")
            search_button.click()

            if len(driver.find_elements(By.XPATH, '//*[@id="ctl00_lblErrors"]'))>0:
                results_dict[f'{person}_{borough_id}_{dev}'] = 'ERROR -- TOO MANY RESULTS'
                pass

            else:
                results_dict[f'{person}_{borough_id}_{dev}'] = get_results(driver)

            with open(output_dir+'_closed_results_cache_addtl.json', 'w') as file:
                json.dump(results_dict, file)



In [None]:
#options = Options()
#options.binary_location = r"C:\Users\sluggk\AppData\Local\Mozilla Firefox\firefox.exe"

#driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=options)
rejected_addtl = get_additional_case_info(driver, list(rejected['Case_ID']))

In [None]:
master_dict = {}
for file in [output_dir+fn for fn in ('_addtl_info_cache.json', 'KEEP_addtl_info_cache.json',
             'KEEP2_addtl_info_cache.json', 'KEEP3_addtl_info_cache.json', 'KEEP4_addtl_info_cache.json')]:
    with open(file, 'r') as reader:
        temp_dict = json.load(reader)
    
    for key, value in temp_dict.items():
        master_dict[key] = [value]
        
    with open(output_dir+'_addtl_info_for_closed_cases.json', 'w') as writer:
        json.dump(master_dict, writer)

In [None]:
def get_value(case_id, col_name, dict_name):
    return dict_name[case_id][col_name]

    
for col in ['Ref_Type', 'Ref_Name', 'Ref_Date']:
    rejected[col] = rejected['Case_ID'].apply(lambda x: get_value(x, col, rejected_addtl))

In [None]:
rejected.to_csv(output_dir+'rejected_06142022.csv')