# Web Scraping Wikitable
1.	Find the URL that contains that the data you want to extract. 
2.	Check the “robots.txt” of the website.
3.	Install and Import necessary libraries.

In [1]:
#Import Libraries
import requests
import pandas as pd
from bs4 import BeautifulSoup

## 4.	Send a GET request to the server.

In [2]:
URL = 'https://en.wikipedia.org/wiki/List_of_datasets_for_machine-learning_research'
response = requests.get(URL)

## 5.	Parse the html data using Beautiful Soup.

In [3]:
soup = BeautifulSoup(response.text,"html.parser")
soup.title.text

'List of datasets for machine-learning research - Wikipedia'

## 6.	Write the code to extract the table.

In [4]:
table = soup.find('table', class_= 'sortable')
tr = table.find_all('tr')
headers= [header.text.strip('\n') for header in table.find_all('th')]
rows = [] #an empty list to store data
for row in table.find_all('tr'):
    rows.append([col.text.strip('\n') for col in row.find_all('td')])
df = pd.DataFrame(rows, columns=headers)
df.head()

Unnamed: 0,Dataset name,Brief description,Preprocessing,Instances,Format,Default task,Created (updated),Reference,Creator
0,,,,,,,,,
1,FERET (facial recognition technology),11338 images of 1199 individuals in different ...,None.,11338.0,Images,"Classification, face recognition",2003.0,[6][7],United States Department of Defense
2,Ryerson Audio-Visual Database of Emotional Spe...,"7,356 video and audio recordings of 24 profess...",Files labelled with expression. Perceptual val...,7356.0,"Video, sound files","Classification, face recognition, voice recogn...",2018.0,[8][9],S.R. Livingstone and F.A. Russo
3,SCFace,Color images of faces at various angles.,Location of facial features extracted. Coordin...,4160.0,"Images, text","Classification, face recognition",2011.0,[10][11],M. Grgic et al.
4,Yale Face Database,Faces of 15 individuals in 11 different expres...,Labels of expressions.,165.0,Images,Face recognition,1997.0,[12][13],J. Yang et al.


In [5]:
#Cleaning the data
df.dropna()

Unnamed: 0,Dataset name,Brief description,Preprocessing,Instances,Format,Default task,Created (updated),Reference,Creator
1,FERET (facial recognition technology),11338 images of 1199 individuals in different ...,None.,11338,Images,"Classification, face recognition",2003,[6][7],United States Department of Defense
2,Ryerson Audio-Visual Database of Emotional Spe...,"7,356 video and audio recordings of 24 profess...",Files labelled with expression. Perceptual val...,7356,"Video, sound files","Classification, face recognition, voice recogn...",2018,[8][9],S.R. Livingstone and F.A. Russo
3,SCFace,Color images of faces at various angles.,Location of facial features extracted. Coordin...,4160,"Images, text","Classification, face recognition",2011,[10][11],M. Grgic et al.
4,Yale Face Database,Faces of 15 individuals in 11 different expres...,Labels of expressions.,165,Images,Face recognition,1997,[12][13],J. Yang et al.
5,Cohn-Kanade AU-Coded Expression Database,Large database of images with labels for expre...,Tracking of certain facial features.,500+ sequences,"Images, text",Facial expression analysis,2000,[14][15],T. Kanade et al.
6,JAFFE Facial Expression Database,213 images of 7 facial expressions (6 basic fa...,Images are cropped to the facial region. Inclu...,213,"Images, text",Facial expression cognition,1998,[16][17],"Lyons, Kamachi, Gyoba"
7,FaceScrub,Images of public figures scrubbed from image s...,Name and m/f annotation.,107818,"Images, text",Face recognition,2014,[18][19],H. Ng et al.
8,BioID Face Database,Images of faces with eye positions marked.,Manually set eye positions.,1521,"Images, text",Face recognition,2001,[20][21],BioID
9,Skin Segmentation Dataset,Randomly sampled color values from face images.,"B, G, R, values extracted.",245057,Text,"Segmentation, classification",2012,[22][23],R. Bhatt.
10,Bosphorus,3D Face image database.,34 action units and 6 expressions labeled; 24 ...,4652,"Images, text","Face recognition, classification",2008,[24][25],A Savran et al.


## 7.	Store the data in a certain format. 

In [6]:
df.to_csv('Wikitable.csv', index=False, encoding='utf-8')