## Importing Libraries 

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd 
import numpy as np
import time

## Creating Driver Instance

In [2]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))


## Navigation to Page

In [3]:
driver.get('https://scrapethissite.com/pages/ajax-javascript/')

## Creating a selenium object to click on hyperlink

In [4]:
link_btn = driver.find_element(By.LINK_TEXT,"2010")
driver.save_screenshot('before.png')
link_btn.click()
driver.implicitly_wait(10)
driver.save_screenshot('after.png')

True

### finding tag by class

In [5]:
table_data = driver.find_element(By.CLASS_NAME,'table')
table_data.tag_name

'table'

In [6]:
type(table_data)

selenium.webdriver.remote.webelement.WebElement

### extracting rows with selenium object and find_element() method

In [7]:
rows = table_data.find_elements(By.TAG_NAME,'tr')

In [8]:
type(rows[0])

selenium.webdriver.remote.webelement.WebElement

In [9]:
rows

[<selenium.webdriver.remote.webelement.WebElement (session="75f703f3afac854bc334fadb54f08017", element="be7577a3-bdc1-481a-9c4d-550078be7e7e")>]

### Creating Python List of data

In [10]:
data = []
for row in rows:
    current_row = []
    for cell in row.find_elements(By.TAG_NAME,'td')[:-1]:
        if cell.text!='\n' and cell.text !=None:
            current_row.append(cell.text)
        
    data.append(current_row)
            

In [11]:
data

[[]]

## Creating Pandas DataFrame  

In [12]:
df = pd.DataFrame(data[1:],columns=['Tilte','Nominations','Awards'])

In [13]:
df

Unnamed: 0,Tilte,Nominations,Awards


In [14]:
df['Year'] = '2010'

In [15]:
df

Unnamed: 0,Tilte,Nominations,Awards,Year


## Seggregating data for all the Available Years 

In [16]:
years = ['201'+str(i) for i in range(1,6)]

for year in years:
    driver.get('https://scrapethissite.com/pages/ajax-javascript/')
    driver.find_element(By.LINK_TEXT,year).click()
    time.sleep(5)
    rows = driver.find_element(By.CLASS_NAME,'table').find_elements(By.TAG_NAME,'tr')
    data = []
    for row in rows:
        current_row = []
        for cell in row.find_elements(By.TAG_NAME,'td')[:-1]: # last column has lot of NA so replacing it by year of awards
            if cell.text!='\n' and cell.text !=None:
                current_row.append(cell.text)
        else:
            current_row.append(year)

        data.append(current_row)
    new_df = pd.DataFrame(data[1:],columns=['Tilte','Nominations','Awards','Year'])
    df = pd.concat([df,new_df],axis=0,ignore_index=True)
  

In [17]:
df.head()

Unnamed: 0,Tilte,Nominations,Awards,Year
0,The Artist,10,5,2011
1,Hugo,11,5,2011
2,The Iron Lady,2,2,2011
3,The Descendants,5,1,2011
4,The Girl with the Dragon Tattoo,5,1,2011


In [18]:
df.tail()

Unnamed: 0,Tilte,Nominations,Awards,Year
69,Bear Story,1,1,2015
70,A Girl in the River: The Price of Forgiveness,1,1,2015
71,Son of Saul,1,1,2015
72,Spectre,1,1,2015
73,Stutterer,1,1,2015


## Saving to a CSV File

In [19]:
df.to_csv('OscarWinningFlims.csv')