## Importing Libraries 

In [63]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd 
import numpy as np
import time

## Creating Driver Instance

In [53]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))


## Navigation to Page

In [54]:
driver.get('https://scrapethissite.com/pages/ajax-javascript/')

## Creating a selenium object to click on hyperlink

In [55]:
link_btn = driver.find_element(By.LINK_TEXT,"2010")
driver.save_screenshot('before.png')
link_btn.click()
driver.implicitly_wait(10)
driver.save_screenshot('after.png')

True

### finding tag by class

In [56]:
table_data = driver.find_element(By.CLASS_NAME,'table')
table_data.tag_name

'table'

In [57]:
type(table_data)

selenium.webdriver.remote.webelement.WebElement

### extracting rows with selenium object and find_element() method

In [58]:
rows = table_data.find_elements(By.TAG_NAME,'tr')

In [59]:
type(rows[0])

selenium.webdriver.remote.webelement.WebElement

In [60]:
rows

[<selenium.webdriver.remote.webelement.WebElement (session="5cd840178945a40b9cef4d2e8cd6797b", element="54757f28-502b-4dc5-947b-0e5c1f7020c5")>,
 <selenium.webdriver.remote.webelement.WebElement (session="5cd840178945a40b9cef4d2e8cd6797b", element="f58aa0a3-62fa-4656-8ee2-13af8757f18e")>,
 <selenium.webdriver.remote.webelement.WebElement (session="5cd840178945a40b9cef4d2e8cd6797b", element="758b7369-0091-447b-a463-f004c1e114ec")>,
 <selenium.webdriver.remote.webelement.WebElement (session="5cd840178945a40b9cef4d2e8cd6797b", element="ec6a3889-b552-40da-a986-3977df567dd2")>,
 <selenium.webdriver.remote.webelement.WebElement (session="5cd840178945a40b9cef4d2e8cd6797b", element="e6116eae-57f8-43af-b2c0-7d8c1e804d35")>,
 <selenium.webdriver.remote.webelement.WebElement (session="5cd840178945a40b9cef4d2e8cd6797b", element="1fe427c5-e87b-4f05-97be-65b1dbb3d194")>,
 <selenium.webdriver.remote.webelement.WebElement (session="5cd840178945a40b9cef4d2e8cd6797b", element="c61eae3c-e4f3-45c6-95b7-73

### Creating Python List of data

In [24]:
data = []
for row in rows:
    current_row = []
    for cell in row.find_elements(By.TAG_NAME,'td')[:-1]:
        if cell.text!='\n' and cell.text !=None:
            current_row.append(cell.text)
        
    data.append(current_row)
            

In [25]:
data

[[],
 ["The King's Speech", '12', '4'],
 ['Inception', '8', '4'],
 ['The Social Network', '8', '3'],
 ['The Fighter', '7', '2'],
 ['Toy Story 3', '5', '2'],
 ['Alice in Wonderland', '3', '2'],
 ['Black Swan', '5', '1'],
 ['In a Better World', '1', '1'],
 ['The Lost Thing', '1', '1'],
 ['God of Love', '1', '1'],
 ['The Wolfman', '1', '1'],
 ['Strangers No More', '1', '1'],
 ['Inside Job', '1', '1']]

## Creating Pandas DataFrame  

In [26]:
df = pd.DataFrame(data[1:],columns=['Tilte','Nominations','Awards'])

In [27]:
df

Unnamed: 0,Tilte,Nominations,Awards
0,The King's Speech,12,4
1,Inception,8,4
2,The Social Network,8,3
3,The Fighter,7,2
4,Toy Story 3,5,2
5,Alice in Wonderland,3,2
6,Black Swan,5,1
7,In a Better World,1,1
8,The Lost Thing,1,1
9,God of Love,1,1


In [28]:
df['Year'] = '2010'

In [29]:
df

Unnamed: 0,Tilte,Nominations,Awards,Year
0,The King's Speech,12,4,2010
1,Inception,8,4,2010
2,The Social Network,8,3,2010
3,The Fighter,7,2,2010
4,Toy Story 3,5,2,2010
5,Alice in Wonderland,3,2,2010
6,Black Swan,5,1,2010
7,In a Better World,1,1,2010
8,The Lost Thing,1,1,2010
9,God of Love,1,1,2010


## Seggregating data for all the Available Years 

In [69]:
years = ['201'+str(i) for i in range(1,6)]

for year in years:
    driver.get('https://scrapethissite.com/pages/ajax-javascript/')
    driver.find_element(By.LINK_TEXT,year).click()
    time.sleep(5)
    rows = driver.find_element(By.CLASS_NAME,'table').find_elements(By.TAG_NAME,'tr')
    data = []
    for row in rows:
        current_row = []
        for cell in row.find_elements(By.TAG_NAME,'td')[:-1]: # last column has lot of NA so replacing it by year of awards
            if cell.text!='\n' and cell.text !=None:
                current_row.append(cell.text)
        else:
            current_row.append(year)

        data.append(current_row)
    new_df = pd.DataFrame(data[1:],columns=['Tilte','Nominations','Awards','Year'])
    df = pd.concat([df,new_df],axis=0)
  

In [70]:
df.head()

Unnamed: 0,Tilte,Nominations,Awards,Year
0,The King's Speech,12,4,2010
1,Inception,8,4,2010
2,The Social Network,8,3,2010
3,The Fighter,7,2,2010
4,Toy Story 3,5,2,2010


In [71]:
df.tail()

Unnamed: 0,Tilte,Nominations,Awards,Year
11,Bear Story,1,1,2015
12,A Girl in the River: The Price of Forgiveness,1,1,2015
13,Son of Saul,1,1,2015
14,Spectre,1,1,2015
15,Stutterer,1,1,2015


## Saving to a CSV File

In [73]:
df.to_csv('OscarWinningFlims.csv')