In [92]:
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import Select

os.environ['WDM_LOG_LEVEL'] = '0'


class browser:
    def __init__(self, URL=None, headless=True, no_image=True) -> object:
        self.url = URL
        self.headless = headless
        self.no_image = no_image
        self.select_elements = {}

    def setup(self):
        options = Options()
        if self.headless:
            options.add_argument('--headless')
        if self.no_image:
            prefs = {"profile.managed_default_content_settings.images": 2}
            options.add_experimental_option("prefs", prefs)
            self.driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
        elif not options:
            self.driver = webdriver.Chrome(ChromeDriverManager().install())
        print('starting the browser')
    
    def close(self):
        self.driver.quit()


In [1]:
import time,re
import os
from bs4 import BeautifulSoup as bs

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import Select

In [135]:
def remove_none(L):
    return [x for x in L if x is not None]

class delhi_hc_search:
    def __init__(self,case_type,case_year,case_no=None,headless=True,no_image=True,delay=1,ret=False):
        """
        The Delhi High court website provides three ways to look for case data:
        1. Using Case type
        2. Using petitioner and respondents information
        3. Using Advocate information 
        4. Using diary no. information
        
        This object provides a way to look for the bulk data using case type. Hence, to initialise we require
        three parameters
        
        case_type: string, an element in the list of options provided by the high court website that categorises a case
        case_no: int, case no. registered in the high court
        case_year: int, Year of registering the case in yyyy
        """
        
        self.url = "http://delhihighcourt.nic.in/case.asp"
        self.case_type = case_type
        self.case_no = case_no
        self.case_year = str(case_year )
        

        """
        Browser configuration
        Certain optimisations have been done to reduce the chrome overload which anyways is much higher. 
        Parameters i.e. headless, no_image
        """
        self.headless = headless
        self.no_image = no_image
        self.elements = {}
        
        """
        This is the meat of the scraper which tells the code where a specific piece of content can be found
        on the webpage.
        """
        self.elements['case_type'] = "//*[@id='InnerPageContent']/form[1]/select[1]"
        self.elements['case_type_elem'] = "//*[@id='InnerPageContent']/form[1]/select[1]"
        self.elements['case_no'] = "//*[@id='InnerPageContent']/form[1]/input[1]"
        self.elements['case_year_elem'] = "//*[@id='c_year']"
        self.elements['captcha_text_input'] = "//*[@id='inputdigit']"
        self.elements['captcha_text_value'] = "//*[@id='InnerPageContent']/form[1]/label[4]"
        self.elements['search_but'] = "//*[@id='InnerPageContent']/form[1]/button"
        
        #Keeping a global name for case_type_select element
        self.case_type_select = None
        self.case_type_options = None
        self.delay = delay
        
        #Assertions
        if self.case_no:
            assert type(self.case_no) == int, "Case no. must be an integer"
        assert (type(case_year) == int) & (len(str(case_year)) == 4), "Case no. must be an year in YYYY format"
    
    def _delay(self):
        #Spleep function that delays page load. Usually to avoid overloading the server
        time.sleep(self.delay)
    
    def get_case_type_options(self):
        #Fetches the case type options from the dropdown menue
        case_type_select = Select(self.driver.find_element_by_xpath(self.elements['case_type']))
        self.case_type_options = [(ind,opt.text) for ind, opt in enumerate(case_type_select.options)]
        #print(self.case_type_options)
        
    def get_search_page(self):
        #Calls the setup method to create a browser driver
        self.setup()
        #Fetch the URL for the delhi highcourt
        self.driver.get(self.url)
        #Fetch all the case type options from the 
        self.get_case_type_options()
        
    def get_captcha_text(self):
        self.captcha_text_val = self.driver.find_element_by_xpath(self.elements['captcha_text_value'])
        self.captcha_text_val = self.captcha_text_val.text.split(" ")[2]
    
    def set_case_type(self):
        case_type_select = Select(self.driver.find_element_by_xpath(self.elements['case_type']))
        case_type_select.select_by_index(self.case_type)
        print(f'Selecting case type {self.case_type_options[self.case_type][1]}')
        
    def set_case_no(self):
        if self.case_no:
            case_type_select = self.driver.find_element_by_xpath(self.elements['case_no'])
            case_type_select.send_keys(self.case_no)
            print(f'Selecting case no {self.case_no}')
        
    def set_case_year(self):
        case_year_select = Select(self.driver.find_element_by_xpath(self.elements['case_year_elem']))
        case_year_select.select_by_value(self.case_year)
        
    def set_captcha_text(self):
        captcha_text = self.driver.find_element_by_xpath(self.elements['captcha_text_input'])
        captcha_text.send_keys(self.captcha_text_val)
        
    def set_param(self):
        self.get_search_page()
        self._delay()
        self.set_case_type()
        self._delay()
        self.set_case_no()
        self._delay()
        self.set_case_year()
        self._delay()
        self.get_captcha_text()
        self.set_captcha_text()
    
    def get_search_results(self):
        #Set search parameters
        self.set_param()
        
        #find search button and click on it
        search_but = self.driver.find_element_by_xpath(self.elements['search_but'])
        search_but.click()
        self.get_search_count()
            
    def start_scraping(self):
        self.get_search_results()
        print(f'Total search results: {self.search_count}')
        self.page_visited = []
        self.page_not_visited = []
        #seed page no visited
        nav_links_curr_page = [element.get_attribute('href') 
                               for element in 
                               self.driver.find_elements_by_class_name('archivelink')]
        
        for link in nav_links_curr_page:
            self.page_not_visited.append(link)
        
        self.valid_links = remove_none(list(set(self.page_not_visited) - set(self.page_visited)))
        page_counter = 0
        data = []
        while self.valid_links:
            self.driver.get(self.valid_links[0])
            self.page_visited.append(self.valid_links[0])
            current_page = self.driver.page_source
            #Get case status data
            data.append(self.parse_status_html(current_page))
            #Get other navigation links
            nav_links_curr_page = [element.get_attribute('href') 
                           for element in 
                           self.driver.find_elements_by_class_name('archivelink')]
            for link in nav_links_curr_page:
                if link not in self.page_visited:
                    self.page_not_visited.append(link)
            self.valid_links = remove_none(list(set(self.page_not_visited) - set(self.page_visited)))
            page_counter += 1
        
        self.scraped_data = pd.concat([pd.DataFrame(d).T for d in data])
        if not ret:
            return None
        else:
            return data
                        
                
        
        #Get all current valid links
        #Page_counter = 0
        #While links are valid:
            #If page_counter !=0
                #Fetch current page
                #Move link from not visited to visited
                #Process current page
                #Process orders link for each application
                #Save each order and judement as pdf
                #Increment counter page_counter += 1
    
    def clean_text(self,text):
        text = text.replace(u'\xa0', u'').replace('\n','')
        text = re.sub(' +',' ',re.sub('\t+','\t',text))
        text = text.lstrip().rstrip()
        return text

    def merge_alternate_list(self,lst1, lst2):
        return [sub[item] for item in range(len(lst2))
                          for sub in [lst1, lst2]]
    
    def parse_status_html(self,html):
        soup = bs(html,'html5lib')
        li_odd = soup.find_all('li',{"class":"clearfix odd"})
        li_even = soup.find_all('li',{"class":"clearfix even"})
        li = self.merge_alternate_list(li_odd,li_even)
        case_status_data = {}
        for i in range(len(li)):
            s0 = self.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[0].get_text())
            sr_no = s0.replace(".","")

            s1 = self.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[1].get_text())
            case_status1 = re.findall(r'\[.*?\]',s1)[0]
            diary_no = s1.split(case_status1)[0].replace('\t','')

            s2 = self.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[2].get_text())
            advocate = s2.split('Advocate :')[-1]
            petitioner,respondent = s2.split('Advocate :')[0].split('Vs.')

            s3 = self.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[3].get_text())
            try:
                court_no,case_status2,judgement_date = re.findall('^\D*(\d)\s(\D*)\s(\d+/\d+/\d+)',s3)[0]
            except:
                court_no,case_status2,judgement_date = "","",""

            case_status_data[f'n_{i}'] = {}
            case_status_data[f'n_{i}']['sr_no'] = sr_no
            case_status_data[f'n_{i}']['diary_no'] = diary_no
            case_status_data[f'n_{i}']['case_status1'] = case_status1
            case_status_data[f'n_{i}']['petitioner'] = petitioner
            case_status_data[f'n_{i}']['respondent'] = respondent
            case_status_data[f'n_{i}']['advocate'] = advocate
            case_status_data[f'n_{i}']['court_no'] = court_no
            case_status_data[f'n_{i}']['case_status2'] = case_status2
            case_status_data[f'n_{i}']['judgement_date'] = judgement_date
        return case_status_data
    
    def get_order_page_links(self,html):
        oj_details_elem = soup.find_all('button',{'class':'button pull-right'})
        oj_details = [str(elem.get('onclick')).replace(str(elem.get('onclick'))[-1],'').replace('location.href=','http://delhihighcourt.nic.in/')
                for elem in oj_details_elem]
    
    def get_search_count(self):
        self.search_count = self.driver.find_element_by_xpath("//*[@id='InnerPageContent']/span").text
        self.search_count = int(self.search_count.split(":")[-1].lstrip().rstrip())
    
    def parse_orders_page(self,html):
        soup = bs(html,'html5lib')
        li = soup.find_all('li',{"class":"clearfix odd"})

        orders = {}
        for i in range(len(li)):
            """
            The Delhi High court website essentially has a a case status page and a button
            which gives details of all the orders related to that case. This function
            parses the page to provide structured data from the page. Variables include
            serial no, date of order, corrigendum text

            """
            s0 = self.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[0].get_text())
            sr_no = s0.replace(".","")

            s1 = self.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[1].get_text())
            case_no = s1

            s2 = self.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[2].get_text())
            date_of_order = s2

            s3 = self.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[3].get_text())
            corrigendum = s3

            orders[f'n_{i+1}'] = {}
            orders[f'n_{i+1}']['sr_no'] = sr_no
            orders[f'n_{i+1}']['case_no'] = case_no
            orders[f'n_{i+1}']['date_of_order'] = date_of_order
            orders[f'n_{i+1}']['corrigendum'] = corrigendum

        return orders
    
    
    def setup(self):
        options = Options()
        if self.headless:
            options.add_argument('--headless')
        
        options.add_argument("--disable-xss-auditor")
        options.add_argument("--disable-web-security")
        options.add_argument("--allow-running-insecure-content")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-setuid-sandbox")
        options.add_argument("--disable-webgl")
        options.add_argument("--ignore-certificate-errors")
        options.add_argument("--disable-popup-blocking")
        
        if self.no_image:
            prefs = {"profile.managed_default_content_settings.images": 2}
            options.add_experimental_option("prefs", prefs)
            self.driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
        elif not options:
            self.driver = webdriver.Chrome(ChromeDriverManager().install())
        print('Browser setup complete')
    
    def close(self):
        self.driver.quit()

In [136]:
delhi.close()

In [137]:
delhi = delhi_hc_search(case_type=4,case_no=None,case_year=2020,headless=False,delay=0)

In [134]:
data = delhi.start_scraping()

[WDM] - Current google-chrome version is 90.0.4430
[WDM] - Get LATEST driver version for 90.0.4430






[WDM] - Driver [/Users/saurabhkarn/.wdm/drivers/chromedriver/mac64/90.0.4430.24/chromedriver] found in cache


Browser setup complete
Selecting case type BAIL APPLN. - [BAILA]
Total search results: 4244


KeyboardInterrupt: 

In [122]:
x = pd.concat([pd.DataFrame(d).T for d in data])

In [131]:
len(x)

4236

In [130]:
len(x.drop_duplicates())

4236

In [119]:
pd.concat([pd.DataFrame(data[0]).T,pd.DataFrame(data[1]).T])

Unnamed: 0,sr_no,diary_no,case_status1,petitioner,respondent,advocate,court_no,case_status2,judgement_date
n_0,65,BAIL APPLN.939/2020,[DISPOSED OFF],DEEPAK DABAS,STATE (GOVT. OF NCT OF DELHI),SHIVEK TREHAN,0.0,DISPOSED OFF on,19/05/2020
n_1,66,BAIL APPLN.938/2020,[DISPOSED OFF],ROSHINI BISWAS,GOVT. OF NCT OF DELHI & ANR.,ANUNAYA MEHTA,0.0,DISPOSED OFF on,19/05/2020
n_2,67,BAIL APPLN.937/2020,[DISPOSED OFF],ANIL YADAV @ SAMEER,THE STATE OF N.C.T OF DELHI,NARESH KUMAR TALWAR,0.0,DISPOSED OFF on,04/06/2020
n_3,68,BAIL APPLN.936/2020,[DISPOSED OFF],GHANSHYAM,STATE (NCT OF DELHI),RAJAN BAJAJ,0.0,DISPOSED OFF on,10/07/2020
n_4,69,BAIL APPLN.935/2020,[DISPOSED OFF],PRADEEP @ DEEPAK @ BHATTA,THE STATE (NCT OF DELHI),R K GIRI,0.0,DISPOSED OFF on,22/07/2020
n_5,70,BAIL APPLN.934/2020,[DISPOSED OFF],DILSHER AZAD,STATE (GOVT. OF NCT OF DELHI),VISHAL RAJ SEHIJPAL,0.0,DISPOSED OFF on,01/06/2020
n_6,71,BAIL APPLN.933/2020,[DISPOSED OFF],RAKESH SHARMA @ KALU,STATE,JATAN SINGH,0.0,DISPOSED OFF on,02/06/2020
n_7,72,BAIL APPLN.932/2020,[DISPOSED OFF],MOHAN RAI ARORA @ MONU,GOVT. OF NCT OF DELHI,SHRI SINGH,0.0,DISPOSED OFF on,21/05/2020
n_0,17,BAIL APPLN.984/2020,[DISPOSED OFF],SAGAR YADAV,STATE OF NCT OF DELHI,ASHOK CHHIKARA,0.0,DISPOSED OFF on,22/05/2020
n_1,18,BAIL APPLN.983/2020,[DISPOSED OFF],SURAJ,STATE (GNCT OF DELHI),HEMANT GULATI,0.0,DISPOSED OFF on,28/05/2020


In [107]:
import pandas as pd
dat = pd.DataFrame(data)

In [None]:
new_dict = {}
for item in data:
    name = item.pop('name')
    new_dict[name] = item


In [71]:
n_x = delhi.page_not_visited
v_x = delhi.page_visited

In [78]:
n_x[0]

['http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=8&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=16&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=24&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=32&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=40&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=48&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyea

In [19]:
delhi.get_search_count()

In [20]:
delhi.search_count

4244

In [41]:
pages = [element.get_attribute('href') for element in delhi.driver.find_elements_by_class_name('archivelink')]

In [None]:
def get_valid_links(page_visited,page_not_visited):
    valid_links = list(set(page_not_visited) - set(page_visited))
    return valid_links

In [38]:
pages

['http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=8&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=16&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=24&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=32&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=40&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyear=&SNo=1&SRecNo=48&dno=&dyear=&ctype_29=BAIL%20APPLN.&cno=&cyear=2020&party=&adv=',
 'http://delhihighcourt.nic.in/dhc_case_status_list_new.asp?ayear=&pyea

In [36]:
delhi.driver.find_elements_by_class_name('page-navigation').get_attribute()

[<selenium.webdriver.remote.webelement.WebElement (session="f3114641946ac7c5250f8519379e074c", element="5b7351fd-a19c-481f-bd42-f2430f998410")>]

In [32]:
delhi.driver.find_element_by_xpath("//*[@id='InnerPageContent']/div")

<selenium.webdriver.remote.webelement.WebElement (session="f3114641946ac7c5250f8519379e074c", element="5b7351fd-a19c-481f-bd42-f2430f998410")>

In [30]:
soup.find_all('div',{'class':'page-navigation'})

[<div class="page-navigation">
 Page : 
 1
      
 
     <a class="archivelink" href="dhc_case_status_list_new.asp?ayear=&amp;pyear=&amp;SNo=1&amp;SRecNo=8&amp;dno=&amp;dyear=&amp;ctype_29=BAIL APPLN.&amp;cno=&amp;cyear=2020&amp;party=&amp;adv=">
     2</a>
 
      
 
     <a class="archivelink" href="dhc_case_status_list_new.asp?ayear=&amp;pyear=&amp;SNo=1&amp;SRecNo=16&amp;dno=&amp;dyear=&amp;ctype_29=BAIL APPLN.&amp;cno=&amp;cyear=2020&amp;party=&amp;adv=">
     3</a>
 
      
 
     <a class="archivelink" href="dhc_case_status_list_new.asp?ayear=&amp;pyear=&amp;SNo=1&amp;SRecNo=24&amp;dno=&amp;dyear=&amp;ctype_29=BAIL APPLN.&amp;cno=&amp;cyear=2020&amp;party=&amp;adv=">
     4</a>
 
      
 
     <a class="archivelink" href="dhc_case_status_list_new.asp?ayear=&amp;pyear=&amp;SNo=1&amp;SRecNo=32&amp;dno=&amp;dyear=&amp;ctype_29=BAIL APPLN.&amp;cno=&amp;cyear=2020&amp;party=&amp;adv=">
     5</a>
 
      
 
     <a class="archivelink" href="dhc_case_status_list_new.asp?ayear=&amp;pye

In [17]:
delhi.case_type_options

[(0, 'ARB. A. (COMM.) - [ARB]'),
 (1, 'ARB.A. - [AAP]'),
 (2, 'ARB.P. - [AA]'),
 (3, 'AW - [AW]'),
 (4, 'BAIL APPLN. - [BAILA]'),
 (5, 'C.O. - [XOBJ]'),
 (6, 'C.O. - [CO]'),
 (7, 'C.R.P. - [CR]'),
 (8, 'C.REF.(O) - [CRO]'),
 (9, 'C.RULE - [CRULE]'),
 (10, 'CA - [CAV]'),
 (11, 'CA - [CAA]'),
 (12, 'CA - [CAA]'),
 (13, 'CAVEAT(CO.) - [CAVE]'),
 (14, 'CC - [CC]'),
 (15, 'CC(ARB.) - [CCR]'),
 (16, 'CCP(CO.) - [CCPCO]'),
 (17, 'CCP(O) - [CCPO]'),
 (18, 'CCP(REF) - [CCPRF]'),
 (19, 'CEAC - [CEAC]'),
 (20, 'CEAR - [CEAR]'),
 (21, 'CF - [CF]'),
 (22, 'CHAT.A.C. - [CHAC]'),
 (23, 'CHAT.A.REF - [CHAR]'),
 (24, 'CM APPL. - [CM2]'),
 (25, 'CM APPL. - [CM1]'),
 (26, 'CM(M) - [CMM]'),
 (27, 'CMI - [CMI]'),
 (28, 'CMI - [CMI]'),
 (29, 'CO.A(SB) - [COASB]'),
 (30, 'CO.A(SB) - [CO.A]'),
 (31, 'CO.APP. - [COA]'),
 (32, 'CO.APPL. - [CA]'),
 (33, 'CO.APPL.(C) - [CA(C)]'),
 (34, 'CO.APPL.(M) - [CA(M)]'),
 (35, 'CO.EX. - [CO.EX]'),
 (36, 'CO.PET. - [CP]'),
 (37, 'CONT.APP.(C) - [CCA]'),
 (38, 'CONT.CAS(C) -

In [67]:
html = delhi.driver.page_source
dat = delhi.parse_status_html(html)
soup = bs(html,'html5lib')
oj_details_elem = soup.find_all('button',{'class':'button pull-right'})
oj_details = [str(elem.get('onclick')).replace(str(elem.get('onclick'))[-1],'').replace('location.href=','http://delhihighcourt.nic.in/')
                for elem in oj_details_elem]

In [106]:
search_count = delhi.driver.find_element_by_xpath("//*[@id='InnerPageContent']/span").text
search_count = int(search_count.split(":")[-1].lstrip().rstrip())

In [6]:
i=0
delhi.driver.execute_script("window.open('{}');".format(oj_details[i]))

In [7]:
window_list = delhi.driver.window_handles

In [15]:
delhi.driver.switch_to.window(window_list[1])

In [18]:
p1 = delhi.driver.page_source
delhi.driver.switch_to.window(window_list[0])
p2 = delhi.driver.page_source

In [22]:
delhi.driver.switch_to.window(window_list[1])

In [23]:
p2 = delhi.driver.page_source

In [43]:
len(li)

22

In [9]:
for i in range(len(oj_details)):
    delhi.driver.execute_script("window.open('{}');".format(oj_details[i]))

In [10]:
x = delhi.driver.window_handles

In [12]:
delhi.driver.switch_to.window(x[1])

In [25]:
sr_no = 6+1
delhi.driver.switch_to_window(x[sr_no-4])

  delhi.driver.switch_to_window(x[sr_no-4])


In [36]:
soup = bs(html,'html5lib')
li_odd = soup.find_all('li',{"class":"clearfix odd"})
li_even = soup.find_all('li',{"class":"clearfix even"})
li = delhi.merge_alternate_list(li_odd,li_even)

In [43]:
i=5
s0 = delhi.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[0].get_text())
sr_no = s0.replace(".","")

In [44]:
sr_no

'6'

In [32]:
s1 = delhi.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[1].get_text())

In [33]:
s1

'ARB.A.6/2020\t[PENDING]\t Order(s) Judgement(s)'

In [1101]:
s1 = delhi.clean_text(li[i].select('span',attrs={'class':re.compile('^title*')})[1].get_text())

In [1102]:
s1

'ARB.A.6/2020\t[PENDING]\t Order(s) Judgement(s)'