# Importing Libraries

In [3]:
#selenium libraries
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException, ElementNotInteractableException, ElementClickInterceptedException

import time as t
import pandas as pd
import re

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC

import requests
import urllib.parse
import os
import shutil
pd.set_option("max_rows", None)

In [4]:
def get_drive_launch(url):
    """
    This function initiates the Chrome browser's driver with the url passed as a parameter and returns the driver instance.
    Parameters:
        url - url of the website
    return:
        driver - driver instance of the chrome browser    
    """
    #creating driver instance
    driver = webdriver.Chrome('./driver/chromedriver.exe')
    driver.maximize_window()
    
    #defining implicit wait
    driver.implicitly_wait(10)
    
    #launching the url
    driver.get(url)
    
    return driver

## 1.Scrape the details of most viewed videos on YouTube from Wikipedia:  
Url= https://en.wikipedia.org/wiki/List_of_most-viewed_YouTube_videos.  

You need to find following details:  

A) Rank 

B) Name 

C) Artist 

D) Upload date 

E) Views

In [94]:
def scrape_webTable_data(url, table_name = None, drop_last = False, rename = None, drop_first = False):
    """
    This function scrapes data from web table in the html page from the url passed in.
    parameter-
    url: url of the webpage
    table_name: Name(caption) of the web table in the web page. Defaults to None. 
                If None, it fetches the 1st web table in the page.
                If a table caption is given it fetches the 1st table from the page that matches the caption
    drop_last: Boolean value. If true drops the last record.
    rename: A dictionary. The old column name and new column names can be sent as a dictionary
    drop_first: Boolean value. If ture drops the first record.
    
    Returns-
    DataFrame: Returns a pandas dataframe with data from the web table
    """
    if table_name:
        table_MN = pd.read_html(url, match=table_name)[0]
    else:
        table_MN = pd.read_html(url)[0]
        
    df = table_MN
    
    if drop_last:
        table_MN = table_MN[:-1].copy()    
    if drop_first:
        table_MN = table_MN[1:].copy()
    if rename:
        table_MN = table_MN.rename(columns=rename)
    df = table_MN
    
    return df


In [86]:
url = "https://en.wikipedia.org/wiki/List_of_most-viewed_YouTube_videos"
data = scrape_webTable_data(url,"Top 30 most-viewed YouTube videos", drop_last = True, rename = {'No.': 'Rank'})
data['Video name'] = data['Video name'].apply(lambda x: x.split('[')[0])
data.drop(['Note','Unnamed: 6'], axis = 1,inplace = True)
data

Unnamed: 0,Rank,Video name,Uploader,Views (billions),Upload date
0,1.0,"""Baby Shark Dance""",Pinkfong Kids' Songs & Stories,8.44,"June 17, 2016"
1,2.0,"""Despacito""",Luis Fonsi,7.32,"January 12, 2017"
2,3.0,"""Shape of You""",Ed Sheeran,5.29,"January 30, 2017"
3,4.0,"""Johny Johny Yes Papa""",LooLoo Kids,5.24,"October 8, 2016"
4,5.0,"""See You Again""",Wiz Khalifa,5.08,"April 6, 2015"
5,6.0,"""Masha and the Bear – Recipe for Disaster""",Get Movies,4.43,"January 31, 2012"
6,7.0,"""Uptown Funk""",Mark Ronson,4.16,"November 19, 2014"
7,8.0,"""Gangnam Style""",Psy,4.05,"July 15, 2012"
8,9.0,"""Learning Colors – Colorful Eggs on a Farm""",Miroshka TV,3.97,"February 27, 2018"
9,10.0,"""Bath Song""",Cocomelon – Nursery Rhymes,3.95,"May 2, 2018"


## 2. Scrape the details team India’s international fixtures from bcci.tv.
Url = https://www.bcci.tv/.

You need to find following details:

A) Match title (I.e. 1st ODI)

B) Series

C) Place

D) Date

E) Time

Note: - From bcci.tv home page you have reach to the international fixture page through code

In [13]:
def scrape_bcci_fixture(choice):
    """
    This function will scrape the Fixture details of the Indian team from the BCCI website.
    Parameter- 
    choice: 'International' or 'Domestic'
    returns-
    webdriver: the selenium webdriver instance incase it should be passed on and used.
    dataframe: the scrapped data in the form of a dataframe
    """
    url = 'https://www.bcci.tv/'
    d = get_drive_launch(url)
    url = d.find_element_by_xpath("//div[contains(text(),'"+choice+"')]/following-sibling::div//ul//a[contains(text(),'Fixtures')]").get_attribute('href')
    d.quit()
    d = get_drive_launch(url)

    match_titles = []
    series_lst = []
    places = []
    dates = []
    times = []

    titles_elemns = d.find_elements_by_xpath("//strong[@class='fixture__name fixture__name--with-margin']")
    series_elemns = d.find_elements_by_xpath("//div[@class='fixture__format-strip']")
    places_elemns = d.find_elements_by_xpath("//strong[@class='fixture__name fixture__name--with-margin']/following-sibling::span")
    dates_elemns = d.find_elements_by_xpath("//span[@class='fixture__date']")
    months_elemns = d.find_elements_by_xpath("//span[@class='fixture__month']")
    years_elemns = d.find_elements_by_xpath("//h3[@class='event-list__date js-date']")
    times_elemns = d.find_elements_by_xpath("//span[@class='fixture__time']")

    for title, series, place, date, month, time in zip(titles_elemns,series_elemns,places_elemns,dates_elemns,months_elemns, times_elemns):
        match_titles.append(title.text.strip())
        series_lst.append(series.text.strip().replace('\n',' Match - '))
        places.append(place.text.strip())
        yr = ''
        for i in years_elemns:
            if month.text.lower() in i.text.lower():
                yr = i.text.strip()
                break
        dates.append(date.text.strip()+" "+yr)
        times.append(time.text.strip())

    data = pd.DataFrame({'Match Title':match_titles, 'Series':series_lst, 'Place':places, 'Date':dates, 'Time':times})
    return d, data

In [14]:
d, df = scrape_bcci_fixture('International')
d.quit()
df

Unnamed: 0,Match Title,Series,Place,Date,Time
0,Final,TEST Match - ICC WORLD TEST CHAMPIONSHIP FINAL,"The Ageas Bowl, Southampton",18 June 2021,15:30 IST
1,1st Test,TEST Match - ENGLAND V INDIA 2021,"Trent Bridge, Nottingham",04 August 2021,15:30 IST
2,2nd Test,TEST Match - ENGLAND V INDIA 2021,"Lord's, London",12 August 2021,15:30 IST
3,3rd Test,TEST Match - ENGLAND V INDIA 2021,"Headingley, Leeds",25 August 2021,15:30 IST
4,4th Test,TEST Match - ENGLAND V INDIA 2021,"The Oval, London",02 September 2021,15:30 IST
5,5th Test,TEST Match - ENGLAND V INDIA 2021,"Old Trafford, Manchester",10 September 2021,15:30 IST


## 3. Scrape the details of selenium exception from guru99.com.

Url = https://www.guru99.com/

You need to find following details:

A) Name

B) Description

Note: - From guru99 home page you have to reach to selenium exception handling page through code.

In [102]:
def scrape_guru(topic, sub_topic):
    """
    This function scrapes data from the 'guru99' website based on the topic and the subtopic passed. 
    This function internally calls scrape_webTable_data() function to scrape the data from the web table in the site.
    parameter-
    topic: String. Topic to scrape from.
    sub_topic: String. Sub topic to scrap.
    """
    url = 'https://www.guru99.com/'
    d = get_drive_launch(url)
    d.find_element_by_xpath("//a[contains(text(),'"+topic+"')]").click()
    t.sleep(2)

    d.find_element_by_xpath("//a[@title='"+sub_topic+"']").click()
    t.sleep(2)
    url = d.current_url
    df = scrape_webTable_data(url,rename = {0:'Name', 1:'Description'}, drop_first = True)
    
    return d,df

In [103]:
d,data = scrape_guru('Selenium', 'Selenium Exception Handling (Common Exceptions List)')
d.quit()
data

Unnamed: 0,Name,Description
1,ElementNotVisibleException,This type of Selenium exception occurs when an...
2,ElementNotSelectableException,This Selenium exception occurs when an element...
3,NoSuchElementException,This Exception occurs if an element could not ...
4,NoSuchFrameException,This Exception occurs if the frame target to b...
5,NoAlertPresentException,This Exception occurs when you switch to no pr...
6,NoSuchWindowException,This Exception occurs if the window target to ...
7,StaleElementReferenceException,This Selenium exception occurs happens when th...
8,SessionNotFoundException,The WebDriver is acting after you quit the bro...
9,TimeoutException,Thrown when there is not enough time for a com...
10,WebDriverException,This Exception takes place when the WebDriver ...


## 4. Scrape the details of State-wise GDP of India from statisticstime.com.

Url = http://statisticstimes.com/

You have to find following details:

A) Rank

B) State

C) GSDP(18-19)

D) GSDP(17-18)

E) Share(2017)

F) GDP($ billion)

Note: - From statisticstimes home page you have to reach to economy page through code.

In [87]:
def scrape_statewise_gdp_india(url):
    d = get_drive_launch(url)
    d.find_element_by_xpath("//button[contains(text(),'Economy')]").click()
    d.find_element_by_xpath("//button[contains(text(),'Economy')]/following-sibling::div//a[contains(text(),'India')]").click()
    t.sleep(2)
    frame0 = d.find_element_by_xpath("//iframe[@id='aswift_2']")
    d.switch_to.frame(frame0)
    frame = d.find_element_by_xpath("//iframe[@id='ad_iframe']")
    d.switch_to.frame(frame)
    d.find_element_by_xpath("//div[@id='mys-wrapper']//div[@id='dismiss-button']").click()
    t.sleep(2)
    d.switch_to.default_content()
    d.find_element_by_xpath("//a[contains(text(),'GDP of Indian states')]").click()

    t.sleep(2)
    url = d.current_url
    df = pd.read_html(url,match='GSDP')[0]
    df.drop(df.columns[-2:], axis=1, inplace = True)
    df.columns = ['Rank','State','GSDP(19-20)','GSDP(18-19)','Share(2018)','GDP($ billion)']
    df = df.iloc[:-1]
    df['Rank'] = df['Rank'].astype(int)
    d.quit()
    return df

In [88]:
url = 'http://statisticstimes.com/'
data = scrape_statewise_gdp_india(url)
data

Unnamed: 0,Rank,State,GSDP(19-20),GSDP(18-19),Share(2018),GDP($ billion)
0,1,Maharashtra,-,2632792,13.88%,398.145
1,2,Tamil Nadu,1845853,1630208,8.59%,246.529
2,3,Uttar Pradesh,1687818,1584764,8.35%,239.656
3,4,Gujarat,-,1502899,7.92%,227.276
4,5,Karnataka,1631977,1493127,7.87%,225.798
5,6,West Bengal,1253832,1089898,5.75%,164.820
6,7,Rajasthan,1020989,942586,4.97%,142.543
7,8,Andhra Pradesh,972782,862957,4.55%,130.501
8,9,Telangana,969604,861031,4.54%,130.210
9,10,Madhya Pradesh,906672,809592,4.27%,122.431


## 5. Scrape the details of trending repositories on Github.com.

Url = https://github.com/

You have to find the following details:

A) Repository title

B) Repository description

C) Contributors count

D) Language used

In [18]:
def scrape_trending_github_repo(url):
    repo_names = []
    repo_desc = []
    contribs = []
    Lang = []
    d = get_drive_launch(url)

    d.find_element_by_xpath("//summary[contains(text(),'Explore')]").click()
    try:
        d.find_element_by_xpath("//a[contains(text(),'Trending')]").click()
    except ElementNotInteractableException:
        d.find_element_by_xpath("//summary[contains(text(),'Explore')]").click()
        d.find_element_by_xpath("//a[contains(text(),'Trending')]").click()

    repo_elems = d.find_elements_by_xpath("//h1[@class='h3 lh-condensed']//a")

    for i in range(len(repo_elems)):
        repo_elems = d.find_elements_by_xpath("//h1[@class='h3 lh-condensed']//a")
        repo = repo_elems[i]
        repo_names.append(repo.text)
        repo.click()
        t.sleep(2)
        try:
            des_elemn = d.find_element_by_xpath("//article[@class='markdown-body entry-content container-lg']")
            repo_desc.append(des_elemn.text)
        except NoSuchElementException:
            repo_desc.append("-")

        try:
            contributors = d.find_element_by_xpath("//a[contains(text(),'Contributors')]/span")
            contribs.append(contributors.text)
        except NoSuchElementException:
            contribs.append("-")

        try:
            lang = d.find_element_by_xpath("//h2[contains(text(),'Languages')]/following-sibling::ul")
            Lang.append(','.join(re.sub(r'[^a-zA-Z]',' ',lang.text).split()))

        except NoSuchElementException:
            Lang.append("-")

        d.back()
        t.sleep(2)

    data = pd.DataFrame({'Repository title':repo_names, 'Repository description':repo_desc, 'Contributors count':contribs, 'Lanuage used':Lang})
    return d,data

In [20]:
url = 'https://github.com/'
dr,df = scrape_trending_github_repo(url)
dr.quit()
df

Unnamed: 0,Repository title,Repository description,Contributors count,Lanuage used
0,nvbn / thefuck,"The Fuck\nThe Fuck is a magnificent app, inspi...",156,Python
1,raydium-io / raydium-ui,Raydium UI\nBuild Setup\n# install dependencie...,3,"TypeScript,Vue,Other"
2,thedevdojo / wave,Introduction\nWave is a Software as a Service ...,2,"PHP,Blade,Shell"
3,chrisleekr / binance-trading-bot,Binance Trading Bot\nAutomated Binance trading...,5,"JavaScript,CSS,HTML,Dockerfile"
4,rust-lang / rust,The Rust Programming Language\nThis is the mai...,3255,"Rust,Python,JavaScript,Makefile,C,Shell,Other"
5,microsoft / PowerToys,Microsoft PowerToys\nDownloads & Release notes...,190,"C,C,PowerShell,C,Batchfile,HLSL,Python"
6,DIGITALCRIMINAL / OnlyFans,OnlyFans DataScraper (Python 3.9.X)\nMandatory...,21,"Python,Other"
7,521xueweihan / HelloGitHub,中文 | English\n分享 GitHub 上有趣、入门级的开源项目。\n兴趣是最好的老...,11,Python
8,tldr-pages / tldr,What is tldr-pages?\nThe tldr-pages project is...,1339,"Markdown,Other"
9,AFLplusplus / LibAFL,"LibAFL, the fuzzer library.\nAdvanced Fuzzing ...",10,"Rust,C,C,Shell"


## 6. Scrape the details of top 100 songs on billiboard.com.

Url = https://www.billboard.com/

You have to find the following details:

A) Song name

B) Artist name

C) Last week rank

D) Peak rank

E) Weeks on board

Note: - From the home page you have to click on the charts option then hot 100-page link through code.

In [18]:
def scrape_top_songs(url):
    songs = []
    artists = []
    lst_wks = []
    pks = []
    wobs = []

    d = get_drive_launch(url)
    d.find_element_by_xpath("//div[@class='header__sticky__child']//a[contains(text(),'Charts')]").click()
    t.sleep(1)
    d.find_element_by_xpath("//div[contains(text(),'THE HOT 1OO')]/../..").click()

    song_elmns = d.find_elements_by_xpath("//span[@class='chart-element__information__song text--truncate color--primary']")
    artist_elmns = d.find_elements_by_xpath("//span[@class='chart-element__information__artist text--truncate color--secondary']")
    lst_wk_rnk_elmns = d.find_elements_by_xpath("//div[@class='chart-element__meta text--center color--secondary text--last']")
    pk_elmns = d.find_elements_by_xpath("//div[@class='chart-element__meta text--center color--secondary text--peak']")
    wob_elmns = d.find_elements_by_xpath("//div[@class='chart-element__meta text--center color--secondary text--week']")

    for song, artist, lst_wk, pk, wob in zip(song_elmns,artist_elmns,lst_wk_rnk_elmns,pk_elmns,wob_elmns):
        songs.append(song.text.strip())
        artists.append(artist.text.strip())
        lst_wks.append(lst_wk.text.strip())
        pks.append(pk.text.strip())
        wobs.append(wob.text.strip())

    data = pd.DataFrame({'Song name':songs, 'Artist name':artists, 'Last week rank':lst_wks, 'Peak rank':pks, 'Weeks on board':wobs})
    return d,data

In [26]:
url = 'https://www.billboard.com/'
driver, df = scrape_top_songs(url)
driver.quit()
pd.set_option("max_rows", None)
df

Unnamed: 0,Song name,Artist name,Last week rank,Peak rank,Weeks on board
0,Rapstar,Polo G,1,1,2
1,Leave The Door Open,Silk Sonic (Bruno Mars & Anderson .Paak),3,1,7
2,Peaches,Justin Bieber Featuring Daniel Caesar & Giveon,4,1,5
3,Montero (Call Me By Your Name),Lil Nas X,2,1,4
4,Levitating,Dua Lipa Featuring DaBaby,6,5,29
5,Save Your Tears,The Weeknd,5,4,19
6,Astronaut In The Ocean,Masked Wolf,10,7,10
7,Kiss Me More,Doja Cat Featuring SZA,7,7,2
8,Up,Cardi B,8,1,11
9,Drivers License,Olivia Rodrigo,9,1,15


## 7. Scrape the details of Data science recruiters from naukri.com.

Url = https://www.naukri.com/

You have to find the following details:

A) Name

B) Designation

C) Company

D) Skills they hire for

E) Location

Note: - From naukri.com homepage click on the recruiters option and the on the search pane type Data science and 
click on search. All this should be done through code

In [111]:
def get_naukri_recruiters_details(url, search_key):
    names = []
    designs = []
    comps = []
    skills = []
    locations = []
    d = get_drive_launch(url)
    main_window = d.current_window_handle
    d.find_element_by_xpath("//div[contains(text(),'Recruiters')]").click()
    windows = d.window_handles
    d.switch_to.window(windows[1])
    t.sleep(2)
    d.find_element_by_xpath("//input[@placeholder='Skills, Designations, Companies']").send_keys(search_key)
    d.find_element_by_xpath("//button[@type='submit']").click()

    name_elems = d.find_elements_by_xpath("//p[@class='highlightable']/a[1]")

    for name_ele in name_elems:

        name = name_ele.text
        try:
            names.append(d.find_element_by_xpath("//a[@title='"+name+"']/../a").text)
        except NoSuchElementException:
            try:
                names.append(d.find_element_by_xpath("//a[@title='"+name+" ']/../a").text)
            except NoSuchElementException:
                names.append("-")

        try:
            designs.append(d.find_element_by_xpath("//a[@title='"+name+"']/../span[1]").text)
        except NoSuchElementException:
            try:
                designs.append(d.find_element_by_xpath("//a[@title='"+name+" ']/../span[1]").text)
            except NoSuchElementException:
                designs.append("-")

        try:
            comps.append(d.find_element_by_xpath("//a[@title='"+name+"']/../a[2]").text)
        except NoSuchElementException:
            try:
                comps.append(d.find_element_by_xpath("//a[@title='"+name+" ']/../a[2]").text)
            except NoSuchElementException:
                comps.append("-")

        try:
            skills.append(d.find_element_by_xpath("//a[@title='"+name+"']/../../following-sibling::div[@class='hireSec highlightable'][1]").text)
        except NoSuchElementException:
            try:
                skills.append(d.find_element_by_xpath("//a[@title='"+name+" ']/../../following-sibling::div[@class='hireSec highlightable'][1]").text)
            except NoSuchElementException:
                skills.append("-")

        try:
            locations.append(d.find_element_by_xpath("//a[@title='"+name+"']/../span[2]").text)
        except NoSuchElementException:
            try:
                locations.append(d.find_element_by_xpath("//a[@title='"+name+" ']/../span[2]").text)
            except NoSuchElementException:
                locations.append("-")
    df = pd.DataFrame({'Name':names, 'Designation':designs, 'Company':comps, 'Skills they hire for':skills, 'Location':locations})
    return d,df

In [112]:
url = 'https://www.naukri.com/'
d, data = get_naukri_recruiters_details(url, 'Data science')
d.quit()
data

Unnamed: 0,Name,Designation,Company,Skills they hire for,Location
0,Aakash Harit,HR Manager,Data Science Network,"Classic ASP Developer, Internet Marketing Prof...",Delhi
1,shravan Kumar Gaddam,Company Recruiter,Shore Infotech India Pvt. Ltd,".Net, Java, Data Science, Linux Administration...",Hyderabad / Secunderabad
2,Talent Acquisition Executive,Recruitment Professional,XenonStack,"Web Designing, html5, Angular.js, seo, hadoop,...",Chandigarh
3,Anik Agrawal,Company Recruiter,Enerlytics Software Solutions Pvt Ltd,"Mean Stack, javascript, angularjs, mongodb, We...",Ahmedabad
4,MARSIAN Technologies LLP,Company HR,MARSIAN Technologies LLP,"Data Science, Artificial Intelligence, Machine...",Pune
5,subhas patel,Founder CEO,LibraryXProject,"Hadoop, Spark, Digital Strategy, Data Architec...",UK - (london)
6,Abhishek - Only Analytics Hiring - India and,Recruitment Lead Consultant,Apidel Technologies Division of Transpower,"Analytics, Business Intelligence, Business Ana...",Vadodara / Baroda
7,Institute for Financial Management and Resear,Programme Manager,IFMR,Data Science,Chennai
8,Balu Ramesh,HR Administrator,Techvantage Systems Pvt Ltd,"Machine Learning, algorithms, Go Getter, Compu...",Trivandrum
9,Asif Lucknowi,Director,Weupskill- Live Wire India,"Technical Training, Software Development, Pres...",Indore


## 8. Scrape the details of Highest selling novels.

Url = https://www.theguardian.com/news/datablog/2012/aug/09/best-selling-books-all-time-fifty-shades-grey-compare/

You have to find the following details:

A) Book name

B) Author name

C) Volumes sold

D) Publisher

E) Genre

In [28]:
def get_highest_selling_novels(url):
    web_table_name = 'Top 100 best selling books of all time'
    data = pd.read_html(url,match = web_table_name)[0]
    return data

In [30]:
url = 'https://www.theguardian.com/news/datablog/2012/aug/09/best-selling-books-all-time-fifty-shades-grey-compare/'
df = get_highest_selling_novels(url)
#pd.set_option("max_rows", None)
df = df.iloc[:-1]
df

Unnamed: 0,Rank,Title,Author,Volume Sales,Publisher,Genre
0,1,"Da Vinci Code,The","Brown, Dan",5094805,Transworld,"Crime, Thriller & Adventure"
1,2,Harry Potter and the Deathly Hallows,"Rowling, J.K.",4475152,Bloomsbury,Children's Fiction
2,3,Harry Potter and the Philosopher's Stone,"Rowling, J.K.",4200654,Bloomsbury,Children's Fiction
3,4,Harry Potter and the Order of the Phoenix,"Rowling, J.K.",4179479,Bloomsbury,Children's Fiction
4,5,Fifty Shades of Grey,"James, E. L.",3758936,Random House,Romance & Sagas
5,6,Harry Potter and the Goblet of Fire,"Rowling, J.K.",3583215,Bloomsbury,Children's Fiction
6,7,Harry Potter and the Chamber of Secrets,"Rowling, J.K.",3484047,Bloomsbury,Children's Fiction
7,8,Harry Potter and the Prisoner of Azkaban,"Rowling, J.K.",3377906,Bloomsbury,Children's Fiction
8,9,Angels and Demons,"Brown, Dan",3193946,Transworld,"Crime, Thriller & Adventure"
9,10,Harry Potter and the Half-blood Prince:Childre...,"Rowling, J.K.",2950264,Bloomsbury,Children's Fiction


## 9. Scrape the details most watched tv series of all time from imdb.com.

Url = https://www.imdb.com/list/ls095964455/

You have to find the following details:

A) Name

B) Year span

C) Genre

D) Run time

E) Rating

F) Votes

In [31]:
def scrape_most_watched_tvSeries(url):
    names = []
    years = []
    genres = []
    runtimes = []
    ratings = []
    votes = []

    d = get_drive_launch(url)

    name_elms = d.find_elements_by_xpath("//h3[@class='lister-item-header']/a")
    year_elms = d.find_elements_by_xpath("//h3[@class='lister-item-header']/span[@class='lister-item-year text-muted unbold']")
    gnr_elms = d.find_elements_by_xpath("//span[@class='genre']")
    runtm_elms = d.find_elements_by_xpath("//span[@class='runtime']")
    rat_elms = d.find_elements_by_xpath("//div[@class='ipl-rating-star small']//span[@class='ipl-rating-star__rating']")
    vote_elms = d.find_elements_by_xpath("//span[@name='nv']")

    for name,year,gnr,runtm,rate,vote in zip(name_elms,year_elms,gnr_elms,runtm_elms,rat_elms,vote_elms):
        names.append(name.text)
        years.append(year.text)
        genres.append(gnr.text)
        runtimes.append(runtm.text)
        ratings.append(rate.text)
        votes.append(vote.text)

    data = pd.DataFrame({'Name':names, 'Year span':years, 'Genre':genres, 'Run tile':runtimes, 'Rating':ratings, 'Votes':votes})
    return d, data

In [32]:
url = 'https://www.imdb.com/list/ls095964455/'
driver, df = scrape_most_watched_tvSeries(url)
driver.quit()
df

Unnamed: 0,Name,Year span,Genre,Run tile,Rating,Votes
0,Game of Thrones,(2011–2019),"Action, Adventure, Drama",57 min,9.3,1801515
1,Stranger Things,(2016– ),"Drama, Fantasy, Horror",51 min,8.7,846878
2,The Walking Dead,(2010–2022),"Drama, Horror, Thriller",44 min,8.2,867462
3,13 Reasons Why,(2017–2020),"Drama, Mystery, Thriller",60 min,7.6,260065
4,The 100,(2014–2020),"Drama, Mystery, Sci-Fi",43 min,7.6,221042
5,Orange Is the New Black,(2013–2019),"Comedy, Crime, Drama",59 min,8.1,281227
6,Riverdale,(2017– ),"Crime, Drama, Mystery",45 min,6.9,123215
7,Grey's Anatomy,(2005– ),"Drama, Romance",41 min,7.6,257522
8,The Flash,(2014– ),"Action, Adventure, Drama",43 min,7.7,311128
9,Arrow,(2012–2020),"Action, Adventure, Crime",42 min,7.5,410713


## 10. Details of Datasets from UCI machine learning repositories.

Url = https://archive.ics.uci.edu/

You have to find the following details:

A) Dataset name

B) Data type

C) Task

D) Attribute type

E) No of instances

F) No of attribute

G) Year

Note: - from the home page you have to go to the Show All Dataset page through code.

In [35]:
def get_uci_datasets_details(url):
    d = get_drive_launch(url)
    d.find_element_by_xpath("//b[contains(text(),'View ALL Data Sets')]").click()
    t.sleep(2)
    url = d.current_url
    d.quit()
    data = pd.read_html(url)[5]
    data.drop(0, inplace = True)
    data.columns = ['Dataset name','Data type','Task','Attribute type','No of instances','No of attribute','Year']
    return data.fillna('-')

In [36]:
url = 'https://archive.ics.uci.edu/'
df = get_uci_datasets_details(url)
df

Unnamed: 0,Dataset name,Data type,Task,Attribute type,No of instances,No of attribute,Year
1,Abalone,Multivariate,Classification,"Categorical, Integer, Real",4177,8,1995
2,Adult,Multivariate,Classification,"Categorical, Integer",48842,14,1996
3,Annealing,Multivariate,Classification,"Categorical, Integer, Real",798,38,-
4,Anonymous Microsoft Web Data,-,Recommender-Systems,Categorical,37711,294,1998
5,Arrhythmia,Multivariate,Classification,"Categorical, Integer, Real",452,279,1998
6,Artificial Characters,Multivariate,Classification,"Categorical, Integer, Real",6000,7,1992
7,Audiology (Original),Multivariate,Classification,Categorical,226,-,1987
8,Audiology (Standardized),Multivariate,Classification,Categorical,226,69,1992
9,Auto MPG,Multivariate,Regression,"Categorical, Real",398,8,1993
10,Automobile,Multivariate,Regression,"Categorical, Integer, Real",205,26,1987
