In [68]:
from __future__ import print_function

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from datetime import datetime, timedelta
from time import time, sleep
import urllib
from urllib3.exceptions import MaxRetryError
from pprint import pprint
import smtplib, ssl
import os
import threading
import pickle

In [71]:
def click_back():
    button = driver.find_element_by_xpath("//button[contains(text(), 'Back')]")
    button.click()

def delete_marquee():
    marquee = driver.find_element_by_tag_name('marquee')
    driver.execute_script("arguments[0].parentNode.removeChild(arguments[0]);", marquee)

def try_loop(fn, check=lambda x: True, max_wait=10):
    thread_return = []
    def thread_try():
        while True:
            try:
                output = fn()
                if check(output):
                    thread_return.append(output)
                    break
            except NoSuchElementException:
                sleep(0.5)
    
    t = threading.Thread(target=thread_try)
    t.daemon = True
    t.start()
    t.join(timeout=max_wait)
    if not len(thread_return):
        raise TimeoutError()
    return thread_return.pop()

def get_time():
    now = datetime.now()
    return now.strftime("%Y/%m/%d %H:%M:%S")

def read_PM25():
    pm25s = try_loop(lambda: driver.find_elements_by_tag_name("td"), lambda cells: len(cells))
    if len(pm25s) < 5:
        print(get_time(), '      No data')
        return {}
    min_PM25 = pm25s[2].text
    max_PM25 = pm25s[3].text
    avg_PM25 = pm25s[4].text
    return {
        'min_PM25': min_PM25,
        'max_PM25': max_PM25,
        'avg_PM25': avg_PM25,
    }

def read_each_month(month_data):
    cells = try_loop(lambda: driver.find_elements_by_class_name('green_Ava'), lambda cells: len(cells))
    for i in range(len(cells)):
        cell = cells[i]
        day = cell.text
        print(get_time(), '   ', day)
        if day not in month_data:
            cell.click()
            month_data[day] = read_PM25()
            try_loop(click_back)
        cells = try_loop(lambda: driver.find_elements_by_class_name('green_Ava'), lambda cells: len(cells))
    month_data['done'] = True

def read_each_site(site_data):
    for row_index in 3, 4:
        rows = try_loop(
            lambda: driver.find_elements_by_tag_name("tr"),
            lambda rows: len(rows) and rows[row_index].find_element_by_tag_name("td").text in ['2019', '2018']
        )
        year = rows[row_index].find_element_by_tag_name("td").text
        for column_index in range(1, 13):
            column = rows[row_index].find_elements_by_tag_name("td")[column_index]
            if column.text == 'NA':
                continue
            print(get_time(), ' ', year, column_index)
            if (year, column_index) not in site_data:
                site_data[year, column_index] = {'done': False}
            if not site_data[year, column_index]['done']:
                column.click()
                read_each_month(site_data[year, column_index])
                try_loop(click_back)
            rows = try_loop(
                lambda: driver.find_elements_by_tag_name("tr"),
                lambda rows: len(rows) and rows[row_index].find_element_by_tag_name("td").text in ['2019', '2018']
            )
    site_data['done'] = True

def change_entry():
    # change the number of entries
    n_entries = driver.find_element_by_xpath("//select")
    option300 = n_entries.find_elements_by_tag_name('option')[1]
    driver.execute_script("arguments[0].value = '300';", option300) 
    driver.find_element_by_xpath("//select").send_keys('25')
    sleep(0.5)
    assert driver.find_element_by_xpath("//select").get_attribute('value') == '300'

In [81]:
driver.quit()

In [5]:
# all_data = {}

In [53]:
click_back()

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//button[contains(text(), 'Back')]"}
  (Session info: headless chrome=91.0.4472.114)


In [70]:
while True:
    try:
        link = 'https://app.cpcbccr.com/ccr/#/caaqm-dashboard-all/caaqm-landing/caaqm-data-availability'

        options = Options()
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        driver = webdriver.Chrome(options=options)

        r = driver.get(link)
        print(get_time(), 'Loaded link')
        param_search = try_loop(lambda: driver.find_element_by_xpath("//a[contains(text(), 'Search by Parameter Name')]"))
        param_search.click()
        print(get_time(), 'Switched to parameters')
        button = try_loop(lambda: driver.find_element_by_xpath("//button[contains(text(), 'Submit')]"))
        button.click()
        delete_marquee()

        try_loop(change_entry)
        def get_stations():
            stations = driver.find_elements_by_tag_name("tr")[1:]
            assert len(stations) == 292
            return stations
        stations = try_loop(get_stations)
        for i in range(292):
            station = stations[i]
            # get location info
            state = station.find_elements_by_tag_name("td")[1].text
            city = station.find_elements_by_tag_name("td")[2].text
            site = station.find_elements_by_tag_name("td")[3].text
            print(get_time(), state, city, site)

            # click - action
            if (state, city, site) not in all_data:
                all_data[state, city, site] = {'done': False}
            if not all_data[state, city, site]['done']:
                station.find_element_by_tag_name("i").click()
                read_each_site(all_data[state, city, site])
                try_loop(click_back)
            with open(f'data/{"_".join([state, city, site])}.p', 'wb') as f:
                pickle.dump(all_data[state, city, site], f)

            try_loop(change_entry)
            stations = try_loop(get_stations)
    except (TimeoutError, ElementClickInterceptedException):
        driver.quit()
        print(get_time(), 'Reset')

2021/06/21 20:27:55 Loaded link
2021/06/21 20:27:56 Switched to parameters
2021/06/21 20:27:58 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:27:58 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:27:59 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:28:00 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:28:00   2019 1
2021/06/21 20:28:00   2019 2
2021/06/21 20:28:00   2019 3
2021/06/21 20:28:00   2019 4
2021/06/21 20:28:00   2019 5
2021/06/21 20:28:00   2019 6
2021/06/21 20:28:00   2019 7
2021/06/21 20:28:00   2019 8
2021/06/21 20:28:00   2019 9
2021/06/21 20:28:00   2019 10
2021/06/21 20:28:00   2019 11
2021/06/21 20:28:00   2019 12
2021/06/21 20:28:01   2018 1
2021/06/21 20:28:01   2018 2
2021/06/21 20:28:01   2018 3
2021/06/21 20:28:01   2018 4
2021/06/21 20:28:01   2018 5
2021/06/21 20:28:01   2018 6
2021/06/21 20:28:01     1
2021/06/21 20:28:01     2
2021/06/21 20:28:01     3

Exception in thread Thread-6613:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 34, in <lambda>
    pm25s = try_loop(lambda: driver.find_elements_by_tag_name("td"), lambda cells: len(cells))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remo

2021/06/21 20:29:07 Loaded link
2021/06/21 20:29:08 Switched to parameters
2021/06/21 20:29:10 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:29:11 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:29:11 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:29:12 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:29:13   2019 1
2021/06/21 20:29:13   2019 2
2021/06/21 20:29:13   2019 3
2021/06/21 20:29:13   2019 4
2021/06/21 20:29:13   2019 5
2021/06/21 20:29:13   2019 6
2021/06/21 20:29:13   2019 7
2021/06/21 20:29:13   2019 8
2021/06/21 20:29:13   2019 9
2021/06/21 20:29:13   2019 10
2021/06/21 20:29:13   2019 11
2021/06/21 20:29:13   2019 12
2021/06/21 20:29:13   2018 1
2021/06/21 20:29:13   2018 2
2021/06/21 20:29:13   2018 3
2021/06/21 20:29:13   2018 4
2021/06/21 20:29:13   2018 5
2021/06/21 20:29:13   2018 6
2021/06/21 20:29:13   2018 7
2021/06/21 20:29:13   2018 8
2021/06/21 20:29:14

Exception in thread Thread-6870:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 34, in <lambda>
    pm25s = try_loop(lambda: driver.find_elements_by_tag_name("td"), lambda cells: len(cells))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remo

2021/06/21 20:30:23 Reset
2021/06/21 20:30:36 Loaded link
2021/06/21 20:30:46 Reset


Exception in thread Thread-6871:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connection.py", line 170, in _new_conn
    (self._dns_host, self.port), self.timeout, **extra_kw
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/util/connection.py", line 96, in create_connection
    raise err
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/util/connection.py", line 86, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 706, in urlopen
    chunked=chunked,
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 394, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/Users/ruoyulan

2021/06/21 20:30:58 Loaded link
2021/06/21 20:30:59 Switched to parameters
2021/06/21 20:31:01 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:31:02 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:31:02 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:31:03 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:31:03   3 1


Exception in thread Thread-6883:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 48, in <lambda>
    cells = try_loop(lambda: driver.find_elements_by_class_name('green_Ava'), lambda cells: len(cells))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 580, in find_elements_by_class_name
    return self.find_elements(by=By.CLASS_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/w

2021/06/21 20:31:13 Reset
2021/06/21 20:31:28 Loaded link
2021/06/21 20:31:29 Switched to parameters
2021/06/21 20:31:31 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:31:32 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:31:32 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:31:33 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:31:33   2019 1
2021/06/21 20:31:33   2019 2
2021/06/21 20:31:33   2019 3
2021/06/21 20:31:33   2019 4
2021/06/21 20:31:33   2019 5
2021/06/21 20:31:33   2019 6
2021/06/21 20:31:33   2019 7
2021/06/21 20:31:34   2019 8
2021/06/21 20:31:34   2019 9
2021/06/21 20:31:34   2019 10
2021/06/21 20:31:34   2019 11
2021/06/21 20:31:34   2019 12
2021/06/21 20:31:34   2018 1
2021/06/21 20:31:34   2018 2
2021/06/21 20:31:34   2018 3
2021/06/21 20:31:34   2018 4
2021/06/21 20:31:34   2018 5
2021/06/21 20:31:34   2018 6
2021/06/21 20:31:34   2018 7
2021/06/21 20:31:34   

Exception in thread Thread-6956:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 34, in <lambda>
    pm25s = try_loop(lambda: driver.find_elements_by_tag_name("td"), lambda cells: len(cells))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remo

2021/06/21 20:31:50 Reset
2021/06/21 20:32:04 Loaded link
2021/06/21 20:32:05 Switched to parameters
2021/06/21 20:32:07 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:32:07 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:32:08 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:32:09 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:32:09   2019 1
2021/06/21 20:32:09   2019 2
2021/06/21 20:32:09   2019 3
2021/06/21 20:32:09   2019 4
2021/06/21 20:32:09   2019 5
2021/06/21 20:32:09   2019 6
2021/06/21 20:32:09   2019 7
2021/06/21 20:32:09   2019 8
2021/06/21 20:32:09   2019 9
2021/06/21 20:32:09   2019 10
2021/06/21 20:32:09   2019 11
2021/06/21 20:32:09   2019 12
2021/06/21 20:32:09   2018 1
2021/06/21 20:32:09   2018 2
2021/06/21 20:32:09   2018 3
2021/06/21 20:32:09   2018 4
2021/06/21 20:32:09   2018 5
2021/06/21 20:32:09   2018 6
2021/06/21 20:32:09   2018 7
2021/06/21 20:32:10   

Exception in thread Thread-7055:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 34, in <lambda>
    pm25s = try_loop(lambda: driver.find_elements_by_tag_name("td"), lambda cells: len(cells))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remo

2021/06/21 20:32:32 Reset
2021/06/21 20:32:45 Loaded link
2021/06/21 20:32:46 Switched to parameters
2021/06/21 20:32:48 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:32:49 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:32:49 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:32:50 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:32:51   2019 1
2021/06/21 20:32:51   2019 2
2021/06/21 20:32:52   2019 3
2021/06/21 20:32:52   2019 4
2021/06/21 20:32:52   2019 5
2021/06/21 20:32:52   2019 6
2021/06/21 20:32:52   2019 7
2021/06/21 20:32:52   2019 8
2021/06/21 20:32:52   2019 9
2021/06/21 20:32:52   2019 10
2021/06/21 20:32:52   2019 11
2021/06/21 20:32:52   2019 12
2021/06/21 20:32:52   2018 1
2021/06/21 20:32:52   2018 2
2021/06/21 20:32:52   2018 3
2021/06/21 20:32:52   2018 4
2021/06/21 20:32:52   2018 5
2021/06/21 20:32:52   2018 6
2021/06/21 20:32:52   2018 7
2021/06/21 20:32:52   

Exception in thread Thread-7093:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 34, in <lambda>
    pm25s = try_loop(lambda: driver.find_elements_by_tag_name("td"), lambda cells: len(cells))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remo

2021/06/21 20:33:03 Reset
2021/06/21 20:33:16 Loaded link
2021/06/21 20:33:18 Switched to parameters
2021/06/21 20:33:19 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:33:20 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:33:21 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:33:21 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:33:21   3 1


Exception in thread Thread-7105:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 48, in <lambda>
    cells = try_loop(lambda: driver.find_elements_by_class_name('green_Ava'), lambda cells: len(cells))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 580, in find_elements_by_class_name
    return self.find_elements(by=By.CLASS_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/w

2021/06/21 20:33:32 Reset
2021/06/21 20:33:46 Loaded link
2021/06/21 20:33:47 Switched to parameters
2021/06/21 20:33:57 Reset


Exception in thread Thread-7108:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connection.py", line 170, in _new_conn
    (self._dns_host, self.port), self.timeout, **extra_kw
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/util/connection.py", line 96, in create_connection
    raise err
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/util/connection.py", line 86, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 706, in urlopen
    chunked=chunked,
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 394, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/Users/ruoyulan

2021/06/21 20:34:11 Loaded link
2021/06/21 20:34:12 Switched to parameters
2021/06/21 20:34:14 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:34:14 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:34:15 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:34:15 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:34:16   2019 1
2021/06/21 20:34:16   2019 2
2021/06/21 20:34:16   2019 3
2021/06/21 20:34:16   2019 4
2021/06/21 20:34:16   2019 5
2021/06/21 20:34:16   2019 6
2021/06/21 20:34:16   2019 7
2021/06/21 20:34:16   2019 8
2021/06/21 20:34:16   2019 9
2021/06/21 20:34:16   2019 10
2021/06/21 20:34:16   2019 11
2021/06/21 20:34:16   2019 12
2021/06/21 20:34:16   2018 1
2021/06/21 20:34:16   2018 2
2021/06/21 20:34:16   2018 3
2021/06/21 20:34:16   2018 4
2021/06/21 20:34:16   2018 5
2021/06/21 20:34:16   2018 6
2021/06/21 20:34:16   2018 7
2021/06/21 20:34:16   2018 8
2021/06/21 20:34:16

Exception in thread Thread-7241:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 62, in <lambda>
    rows = try_loop(lambda: driver.find_elements_by_tag_name("tr"), lambda rows: len(rows))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/

2021/06/21 20:34:51 Reset
2021/06/21 20:35:05 Loaded link
2021/06/21 20:35:07 Switched to parameters
2021/06/21 20:35:17 Reset


Exception in thread Thread-7244:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connection.py", line 170, in _new_conn
    (self._dns_host, self.port), self.timeout, **extra_kw
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/util/connection.py", line 96, in create_connection
    raise err
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/util/connection.py", line 86, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 706, in urlopen
    chunked=chunked,
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 394, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/Users/ruoyulan

2021/06/21 20:35:31 Loaded link
2021/06/21 20:35:41 Reset


Exception in thread Thread-7245:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connection.py", line 170, in _new_conn
    (self._dns_host, self.port), self.timeout, **extra_kw
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/util/connection.py", line 96, in create_connection
    raise err
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/util/connection.py", line 86, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 706, in urlopen
    chunked=chunked,
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 394, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/Users/ruoyulan

2021/06/21 20:35:55 Loaded link
2021/06/21 20:35:56 Switched to parameters
2021/06/21 20:35:58 Kerala Thiruvananthapuram Plammoodu, Thiruvananthapuram - Kerala PCB
2021/06/21 20:35:59 Madhya Pradesh Mandideep Sector-D Industrial Area, Mandideep - MPPCB
2021/06/21 20:35:59 Odisha Talcher Talcher Coalfields,Talcher - OSPCB
2021/06/21 20:36:00 Maharashtra Chandrapur MIDC Khutala, Chandrapur - MPCB
2021/06/21 20:36:01 Maharashtra Nagpur Opp GPO Civil Lines, Nagpur - MPCB
2021/06/21 20:36:01   3 1


Exception in thread Thread-7259:
Traceback (most recent call last):
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/ruoyulan/miniconda3/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-587490863bc8>", line 14, in thread_try
    output = fn()
  File "<ipython-input-65-587490863bc8>", line 48, in <lambda>
    cells = try_loop(lambda: driver.find_elements_by_class_name('green_Ava'), lambda cells: len(cells))
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 580, in find_elements_by_class_name
    return self.find_elements(by=By.CLASS_NAME, value=name)
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/Users/ruoyulan/miniconda3/lib/python3.7/site-packages/selenium/w

2021/06/21 20:36:11 Reset


KeyboardInterrupt: 

In [80]:
for i in range(4, 292, 8):
    print(f'START={i} END={i + 8} python CPCB_scraping.py &> logs/{i}.log &')

START=4 END=12 python CPCB_scraping.py &> logs/4.log &
START=12 END=20 python CPCB_scraping.py &> logs/12.log &
START=20 END=28 python CPCB_scraping.py &> logs/20.log &
START=28 END=36 python CPCB_scraping.py &> logs/28.log &
START=36 END=44 python CPCB_scraping.py &> logs/36.log &
START=44 END=52 python CPCB_scraping.py &> logs/44.log &
START=52 END=60 python CPCB_scraping.py &> logs/52.log &
START=60 END=68 python CPCB_scraping.py &> logs/60.log &
START=68 END=76 python CPCB_scraping.py &> logs/68.log &
START=76 END=84 python CPCB_scraping.py &> logs/76.log &
START=84 END=92 python CPCB_scraping.py &> logs/84.log &
START=92 END=100 python CPCB_scraping.py &> logs/92.log &
START=100 END=108 python CPCB_scraping.py &> logs/100.log &
START=108 END=116 python CPCB_scraping.py &> logs/108.log &
START=116 END=124 python CPCB_scraping.py &> logs/116.log &
START=124 END=132 python CPCB_scraping.py &> logs/124.log &
START=132 END=140 python CPCB_scraping.py &> logs/132.log &
START=140 END=148