In [1]:
# imports the required packages
from bs4 import BeautifulSoup
from html.parser import HTMLParser
import json
import pandas as pd
from pandas.io.json import json_normalize
import re
import requests
from selenium import webdriver
import time
import urllib.request

## Data schema

* Area - Computer Vision
* Task - Image Classification
* Sub-task - Few-Shot Image Classification
* Dataset - Mini-ImageNet - 5-Shot Learning
* Leaderboard

In [2]:
"""
return_headings method

"""
def return_headings(url, heading_tag):
    
    html_doc = requests.get(url).content
    soup = BeautifulSoup(html_doc, 'html.parser')
    
    # searches for all h4 headings
    search_headings = soup.findAll(heading_tag)
    
    # empty area headings list
    headings = []
    
    # appends each area heading to the area_headings list
    for div in search_headings:
        headings.append(div.text)

    # removes the white space from the headings list
    headings = list(map(str.strip, headings))

    # list comphrehension for lower casing each string in the area_headings list
    headings = [x.lower() for x in headings]

    # list comprehension that replaces the white space with an dash in the area_headings list
    headings = [x.replace(" ", "-") for x in headings]
    
    # returns the headings array
    return headings

In [3]:
"""
return_dataset method

"""
def return_dataset(url, dataset_tag):
    
    html_doc = requests.get(url).content
    soup = BeautifulSoup(html_doc, 'html.parser')
    
    #
    search_datasets = soup.findAll('div', attrs = {'class': dataset_tag})
    
    # empty area headings list
    task_datasets = []
    
    # appends each task dataset to the task_datasets list
    for div in search_datasets:
        task_datasets.append(div.text)

    # removes the white space from the task_datasets list
    task_datasets = list(map(str.strip, task_datasets))

    # list comphrension for lower casing each string in the task_datasets list
    task_datasets = [x.lower() for x in task_datasets]

    # list comphrension for replacing the white space with an dash in the task_datasets list
    task_datasets = [x.replace(" ", "-") for x in task_datasets]
    
    # removes the brace in the task_datasets list
    task_datasets = [x.replace("(", "") for x in task_datasets]
    task_datasets = [x.replace(")", "") for x in task_datasets]
    
    # returns the task_datasets list
    return task_datasets

## Extracting the areas

In [4]:
#
url = 'https://paperswithcode.com/sota'
heading_tag = 'h4'

# invokes the return_headings function to return each of the area headings
area_headings = return_headings(url, heading_tag)

print(area_headings)

['computer-vision', 'natural-language-processing', 'medical', 'methodology', 'miscellaneous', 'speech', 'playing-games', 'graphs', 'time-series', 'audio', 'robots', 'music', 'computer-code', 'reasoning', 'knowledge-base', 'adversarial']


## Extracting the tasks

In [5]:
# initialises an empty task headings list
task_headings = []
    
# iterates through each of the area headings
for i in range(len(area_headings)):
    
    url = 'https://paperswithcode.com/area/' + area_headings[i]
    heading_tag = 'h4'
    
    # invokes the return_headings function to return 
    # and append each of the task headings to the task_headings list
    task_headings.append(return_headings(url, heading_tag))
    
# converts the resulting 2d array into a 1d array using list comprehension
# task_headings = [s for S in task_headings for s in S]

# prints the first 10 elements in the task_headings list
print(task_headings[0][0])

semantic-segmentation


## Extracts the sub-tasks

In [6]:
# initialises an empty subtask headings list
subtask_headings = []
    
# iterates through each of the area headings
for i in range(len(area_headings)):
    
    # iterates through each of the corresponding subtask headings
    for j in range(len(task_headings[i])):
        
        url = 'https://paperswithcode.com/area/' + area_headings[i] + '/' + task_headings[i][j] 
        heading_tag = 'h1'
        
        # invokes the return_headings function to return 
        # and append each of the subtask headings to the subtask_headings list
        subtask_headings.append(return_headings(url, heading_tag))
        
# converts the resulting 2d list into a 1d list using list comprehension
subtask_headings = [s for S in subtask_headings for s in S]

# list comprehension for removing duplicate subtask heading
subtask_headings = [ x for x in subtask_headings if "-subtasks" not in x]

In [453]:
print(len(subtask_headings))

1210


In [622]:
# displays the first 5 elements in the subtask_headings list
subtask_headings[:5]

['semantic-segmentation',
 'real-time-semantic-segmentation',
 'scene-segmentation',
 '3d-part-segmentation',
 'weakly-supervised-semantic-segmentation']

## Extracting the dataset relating to each sub-task

In [8]:
#
subtask_datasets = []

# iterates through each 
for i in range(len(subtask_headings)):
    
    #
    url = "https://paperswithcode.com/task/" + subtask_headings[i]
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'html.parser')
    
    print(url)

    # nested for loop
    for link in soup.findAll('a', attrs={'href': re.compile("/sota/" + subtask_headings[i])}):
        
        # 
        subtask_datasets.append(link.get('href'))
        
        # remove duplicates from the subtasks_dataset list
        subtask_datasets  = list(set(subtask_datasets))

#
print(subtask_datasets)
print(len(subtask_datasets))

https://paperswithcode.com/task/semantic-segmentation
https://paperswithcode.com/task/real-time-semantic-segmentation
https://paperswithcode.com/task/scene-segmentation
https://paperswithcode.com/task/3d-part-segmentation
https://paperswithcode.com/task/weakly-supervised-semantic-segmentation
https://paperswithcode.com/task/semi-supervised-semantic-segmentation
https://paperswithcode.com/task/panoptic-segmentation
https://paperswithcode.com/task/unsupervised-semantic-segmentation
https://paperswithcode.com/task/image-classification
https://paperswithcode.com/task/few-shot-image-classification
https://paperswithcode.com/task/semi-supervised-image-classification
https://paperswithcode.com/task/fine-grained-image-classification
https://paperswithcode.com/task/hyperspectral-image-classification
https://paperswithcode.com/task/sequential-image-classification
https://paperswithcode.com/task/unsupervised-image-classification
https://paperswithcode.com/task/document-image-classification
https:

https://paperswithcode.com/task/video-object-tracking
https://paperswithcode.com/task/dynamic-region-segmentation
https://paperswithcode.com/task/video
https://paperswithcode.com/task/video-interlacing
https://paperswithcode.com/task/image-retrieval
https://paperswithcode.com/task/content-based-image-retrieval
https://paperswithcode.com/task/sketch-based-image-retrieval
https://paperswithcode.com/task/multi-label-image-retrieval
https://paperswithcode.com/task/medical-image-retrieval
https://paperswithcode.com/task/image-instance-retrieval
https://paperswithcode.com/task/texture-image-retrieval
https://paperswithcode.com/task/face-image-retrieval
https://paperswithcode.com/task/object-recognition
https://paperswithcode.com/task/3d-object-recognition
https://paperswithcode.com/task/continuous-object-recognition
https://paperswithcode.com/task/depiction-invariant-object-recognition
https://paperswithcode.com/task/action-recognition
https://paperswithcode.com/task/action-recognition-in-vi

https://paperswithcode.com/task/semi-supervised-video-object-segmentation
https://paperswithcode.com/task/unsupervised-video-object-segmentation
https://paperswithcode.com/task/visual-object-tracking
https://paperswithcode.com/task/multiple-object-tracking
https://paperswithcode.com/task/multi-object-tracking
https://paperswithcode.com/task/online-multi-object-tracking
https://paperswithcode.com/task/thermal-infrared-object-tracking
https://paperswithcode.com/task/video-object-tracking
https://paperswithcode.com/task/unsupervised-image-to-image-translation
https://paperswithcode.com/task/synthetic-to-real-translation
https://paperswithcode.com/task/multimodal-unsupervised-image-to-image-translation
https://paperswithcode.com/task/photo-to-caricature-translation
https://paperswithcode.com/task/cartoon-to-real-translation
https://paperswithcode.com/task/gesture-recognition
https://paperswithcode.com/task/hand-gesture-recognition
https://paperswithcode.com/task/hand-gesture-recognition
ht

https://paperswithcode.com/task/sign-language-recognition
https://paperswithcode.com/task/fine-grained-image-recognition
https://paperswithcode.com/task/license-plate-recognition
https://paperswithcode.com/task/image-recognition
https://paperswithcode.com/task/contour-detection
https://paperswithcode.com/task/interactive-segmentation
https://paperswithcode.com/task/infrared-and-visible-image-fusion
https://paperswithcode.com/task/visual-place-recognition
https://paperswithcode.com/task/line-segment-detection
https://paperswithcode.com/task/material-recognition
https://paperswithcode.com/task/multiview-learning
https://paperswithcode.com/task/dense-pixel-correspondence-estimation
https://paperswithcode.com/task/image-quality-estimation
https://paperswithcode.com/task/lipreading
https://paperswithcode.com/task/art-analysis
https://paperswithcode.com/task/fake-image-detection
https://paperswithcode.com/task/scene-flow-estimation
https://paperswithcode.com/task/human-instance-segmentation


https://paperswithcode.com/task/memex-question-answering
https://paperswithcode.com/task/mathematical-question-answering
https://paperswithcode.com/task/language-modelling
https://paperswithcode.com/task/sentence-pair-modeling
https://paperswithcode.com/task/sentiment-analysis
https://paperswithcode.com/task/aspect-based-sentiment-analysis
https://paperswithcode.com/task/multimodal-sentiment-analysis
https://paperswithcode.com/task/twitter-sentiment-analysis
https://paperswithcode.com/task/fine-grained-opinion-analysis
https://paperswithcode.com/task/text-classification
https://paperswithcode.com/task/document-classification
https://paperswithcode.com/task/sentence-classification
https://paperswithcode.com/task/text-categorization
https://paperswithcode.com/task/emotion-classification
https://paperswithcode.com/task/citation-intent-classification
https://paperswithcode.com/task/cross-domain-text-classification
https://paperswithcode.com/task/text-generation
https://paperswithcode.com/t

https://paperswithcode.com/task/negation-detection
https://paperswithcode.com/task/phrase-grounding
https://paperswithcode.com/task/lexical-analysis
https://paperswithcode.com/task/hypernym-discovery
https://paperswithcode.com/task/text-effects-transfer
https://paperswithcode.com/task/dialog-act-classification
https://paperswithcode.com/task/unsupervised-sentence-compression
https://paperswithcode.com/task/nested-mention-recognition
https://paperswithcode.com/task/entity-alignment
https://paperswithcode.com/task/information-retrieval
https://paperswithcode.com/task/abstract-anaphora-resolution
https://paperswithcode.com/task/bridging-anaphora-resolution
https://paperswithcode.com/task/anaphora-resolution
https://paperswithcode.com/task/phrase-vector-embedding
https://paperswithcode.com/task/query-wellformedness
https://paperswithcode.com/task/document-representation
https://paperswithcode.com/task/abstract-argumentation
https://paperswithcode.com/task/entity-resolution
https://paperswi

https://paperswithcode.com/task/graph-representation-learning
https://paperswithcode.com/task/knowledge-graph-embeddings
https://paperswithcode.com/task/knowledge-graph-embedding
https://paperswithcode.com/task/learning-word-embeddings
https://paperswithcode.com/task/document-embedding
https://paperswithcode.com/task/multilingual-word-embeddings
https://paperswithcode.com/task/learning-semantic-representations
https://paperswithcode.com/task/sentence-embeddings-for-biomedical-texts
https://paperswithcode.com/task/learning-representation-of-multi-view-data
https://paperswithcode.com/task/learning-representation-on-graph
https://paperswithcode.com/task/learning-network-representations
https://paperswithcode.com/task/word-embeddings
https://paperswithcode.com/task/learning-word-embeddings
https://paperswithcode.com/task/multilingual-word-embeddings
https://paperswithcode.com/task/transfer-learning
https://paperswithcode.com/task/multi-task-learning
https://paperswithcode.com/task/transfer

https://paperswithcode.com/task/classification-of-variable-stars
https://paperswithcode.com/task/non-intrusive-load-monitoring
https://paperswithcode.com/task/home-activity-monitoring
https://paperswithcode.com/task/air-quality-inference
https://paperswithcode.com/task/modeling-local-geometric-structure
https://paperswithcode.com/task/photometric-redshift-estimation
https://paperswithcode.com/task/detecting-adverts
https://paperswithcode.com/task/advertising
https://paperswithcode.com/task/speech-recognition
https://paperswithcode.com/task/noisy-speech-recognition
https://paperswithcode.com/task/distant-speech-recognition
https://paperswithcode.com/task/robust-speech-recognition
https://paperswithcode.com/task/visual-speech-recognition
https://paperswithcode.com/task/accented-speech-recognition
https://paperswithcode.com/task/sequence-to-sequence-speech-recognition
https://paperswithcode.com/task/english-conversational-speech-recognition
https://paperswithcode.com/task/speaker-verifica

https://paperswithcode.com/task/motion-planning
https://paperswithcode.com/task/visual-navigation
https://paperswithcode.com/task/robotic-grasping
https://paperswithcode.com/task/human-grasp-contact-prediction
https://paperswithcode.com/task/legged-robots
https://paperswithcode.com/task/robot-task-planning
https://paperswithcode.com/task/deformable-object-manipulation
https://paperswithcode.com/task/gesture-generation
https://paperswithcode.com/task/not-found
https://paperswithcode.com/task/marine-robot-navigation
https://paperswithcode.com/task/optimal-motion-planning
https://paperswithcode.com/task/style-transfer
https://paperswithcode.com/task/image-stylization
https://paperswithcode.com/task/style-generalization
https://paperswithcode.com/task/music-genre-transfer
https://paperswithcode.com/task/font-style-transfer
https://paperswithcode.com/task/music-information-retrieval
https://paperswithcode.com/task/music-modeling
https://paperswithcode.com/task/music-auto-tagging
https://pap

In [66]:
# sorts the subtask_datasets list
subtask_datasets = sorted(subtask_datasets)

subtask_datasets

['/sota/3d-face-reconstruction-on-aflw2000-3d',
 '/sota/3d-face-reconstruction-on-florence',
 '/sota/3d-human-pose-estimation-on-chall-h80k',
 '/sota/3d-human-pose-estimation-on-human36m',
 '/sota/3d-medical-imaging-segmentation-on-tcia',
 '/sota/3d-object-classification-on-modelnet40',
 '/sota/3d-object-detection-on-kitti-cars-easy',
 '/sota/3d-object-detection-on-kitti-cars-hard',
 '/sota/3d-object-detection-on-kitti-cars-moderate',
 '/sota/3d-object-detection-on-kitti-cyclists',
 '/sota/3d-object-detection-on-kitti-cyclists-easy',
 '/sota/3d-object-detection-on-kitti-cyclists-hard',
 '/sota/3d-object-detection-on-kitti-pedestrians',
 '/sota/3d-object-detection-on-kitti-pedestrians-easy',
 '/sota/3d-object-detection-on-kitti-pedestrians-hard',
 '/sota/3d-object-detection-on-nyu-depth-v2',
 '/sota/3d-object-detection-on-sun-rgbd',
 '/sota/3d-object-recognition-on-modelnet40',
 '/sota/3d-object-reconstruction-on-data3dr2n2',
 '/sota/3d-part-segmentation-on-shapenet-part',
 '/sota/3d-re

In [294]:
# empty data list
data = []

#
for i in range(20):
    
    #
    url = "https://paperswithcode.com" + subtask_datasets[i]  
    html_doc = requests.get(url).content
    soup = BeautifulSoup(html_doc, 'html.parser')

    # extracts the json data from the evaluation table on each page 
#     data.append(subtask_datasets[i])
    data.append(json.loads(soup.find('script', id = 'evaluation-table-data').text))
    
    print(url)
    
# data

https://paperswithcode.com/sota/3d-face-reconstruction-on-aflw2000-3d
https://paperswithcode.com/sota/3d-face-reconstruction-on-florence
https://paperswithcode.com/sota/3d-human-pose-estimation-on-chall-h80k
https://paperswithcode.com/sota/3d-human-pose-estimation-on-human36m
https://paperswithcode.com/sota/3d-medical-imaging-segmentation-on-tcia
https://paperswithcode.com/sota/3d-object-classification-on-modelnet40
https://paperswithcode.com/sota/3d-object-detection-on-kitti-cars-easy
https://paperswithcode.com/sota/3d-object-detection-on-kitti-cars-hard
https://paperswithcode.com/sota/3d-object-detection-on-kitti-cars-moderate
https://paperswithcode.com/sota/3d-object-detection-on-kitti-cyclists
https://paperswithcode.com/sota/3d-object-detection-on-kitti-cyclists-easy
https://paperswithcode.com/sota/3d-object-detection-on-kitti-cyclists-hard
https://paperswithcode.com/sota/3d-object-detection-on-kitti-pedestrians
https://paperswithcode.com/sota/3d-object-detection-on-kitti-pedestria

In [268]:
# list comprehension that converts the resulting 2d array into a 1d array
data = [s for S in data for s in S]

In [276]:
# normalizes the json in the data array and creates a pandas dataframe
papers = json_normalize(data)

# converts the papers.url column to a list
papers = papers['paper.url'].tolist()

In [420]:
papers[0]

'/paper/joint-3d-face-reconstruction-and-dense'

In [376]:
#
leaderboards_data = []

# iterates through each item in the paper list
for i in range(1,3):
    
    #
    url = "https://paperswithcode.com" + papers[i]
    html_doc = requests.get(url).content
    soup = BeautifulSoup(html_doc, 'html.parser')
    
    print(url)
    
#     # extract all the tables in the HTML 
#     tables = soup.find_all('table')

#     #get the class name for each
#     for table in tables:
#         leaderboards_data.append(table['class'])
    
    #
#     leaderboards_data.append(json.loads(soup.find('script', id = 'evaluation-table-data')))
#     leaderboards_data.append((soup.find('script', id = 'paper-evaluation-section')))

    
    
#     searches for all h4 headings
    x = soup.findAll('div', attrs = {'class': 'sota-table'})
    
    for div in x:
        leaderboards_data.append(div.text)

    


    
leaderboards_data
    #     print(url)

https://paperswithcode.com/paper/face-alignment-across-large-poses-a-3d
https://paperswithcode.com/paper/dense-face-alignment


['\n\n\nTask\nDataset\nModel\nMetric name\nMetric value\nGlobal rank\nRemove\n\n\n\n                                                Head Pose Estimation\n                                            \n\n                                                AFLW2000\n                                            \n\n                                                3DDFA\n                                            \n\n                                                MAE\n                                            \n\n                                                 7.393\n                                            \n\n                                                # 2\n                                            \n\n\n\n\n\n\n                                                        -\n                                                    \n\n\n\n\n\n                                                Face Alignment\n                                            \n\n                                      

In [None]:
table = soup.find_all('table')[0] 

# dfz = pd.read_html(str(table))[0]

# dfz

table

In [454]:
#
tables = []

#
paper_titles = []

#
for i in range(0,4):
    
    #
    url = "https://paperswithcode.com" + papers[i]
    html_doc = requests.get(url).content
    soup = BeautifulSoup(html_doc, 'html.parser')
    
    print(url)
    
    #
    
    tables.append(soup.findAll('table')[0])
    
    
    
    search_titles = soup.findAll('div', attrs = {'class': 'paper-title'})
    
    # appends each task dataset to the task_datasets list
    for div in search_titles:
        
        title = div.h1
        
        paper_titles.append((title.text))
        
#         paper_titles.append(div.h1)
    
    
#     print(tables)
    
    print('_____')
    

https://paperswithcode.com/paper/joint-3d-face-reconstruction-and-dense
_____
https://paperswithcode.com/paper/face-alignment-across-large-poses-a-3d
_____
https://paperswithcode.com/paper/dense-face-alignment
_____
https://paperswithcode.com/paper/joint-3d-face-reconstruction-and-dense
_____


In [455]:
paper_titles

['Joint 3D Face Reconstruction and Dense Alignment with Position Map Regression Network',
 'Face Alignment Across Large Poses: A 3D Solution',
 'Dense Face Alignment',
 'Joint 3D Face Reconstruction and Dense Alignment with Position Map Regression Network']

In [461]:
#
dfz = pd.read_html(str(tables))[1]

dfz

Unnamed: 0,Task,Dataset,Model,Metric name,Metric value,Global rank,Remove
0,Head Pose Estimation,AFLW2000,3DDFA,MAE,7.393,# 2,-
1,Face Alignment,AFLW2000-3D,3DDFA + SDM,Mean NME,4.94%,# 5,-
2,3D Face Reconstruction,AFLW2000-3D,3DDFA,Mean NME,5.3695%,# 2,-
3,Head Pose Estimation,BIWI,3DDFA,MAE,19.068,# 4,-
4,3D Face Reconstruction,Florence,3DDFA,Mean NME,6.3833%,# 3,-


In [462]:
paper_titles[1]

'Face Alignment Across Large Poses: A 3D Solution'

In [379]:
# def normalize_space(s):
#     """Return s stripped of leading/trailing whitespace
#     and with internal runs of whitespace replaced by a single SPACE"""
#     # This should be a str method :-(
#     return ' '.join(s.split())

# replacement = [normalize_space(i) for i in leaderboards_data]

# replacement

table = soup.find_all('table')[0] 

dfz = pd.read_html(str(table))[0]

dfz


# countries = dfz["COUNTRY"].tolist()
# users = df["AMOUNT"].tolist()

Unnamed: 0,Task,Dataset,Model,Metric name,Metric value,Global rank,Remove
0,Face Alignment,AFLW2000-3D,DeFA,Mean NME,4.50%,# 4,-
1,3D Face Reconstruction,AFLW2000-3D,DeFA,Mean NME,5.6454%,# 3,-
2,Face Alignment,AFLW-LFPA,DeFA,Mean NME,3.86%,# 2,-


In [368]:

table = soup.find('table', {'class': 'table-striped'})
th = table.findAll('td')

th

[<td>
                                                 Head Pose Estimation
                                             </td>, <td>
                                                 AFLW2000
                                             </td>, <td>
                                                 3DDFA
                                             </td>, <td>
                                                 MAE
                                             </td>, <td>
                                                  7.393
                                             </td>, <td>
                                                 # 2
                                             </td>, <td>
 <form action="" method="post">
 <input name="csrfmiddlewaretoken" type="hidden" value="5RLFYIdtmqc5NAb7wPChmiJ7DrCyS73NZtgunt9tmGAP1iI8kXK4J28OA7XGEItN"/>
 <input name="remove_row_pk" type="hidden" value="2440"/>
 <input name="remove_metric_pk" type="hidden" value="426"/>
 <button class="btn btn-danger" t

In [324]:
# removes the white space from the headings list
leaderboards_data = [line.rstrip('\n') for line in leaderboards_data]

leaderboards_data

['Task\nDataset\nModel\nMetric name\nMetric value\nGlobal rank\nRemove\n\n\n\n                                                Face Alignment\n                                            \n\n                                                AFLW2000-3D\n                                            \n\n                                                PRN\n                                            \n\n                                                Mean NME \n                                            \n\n                                                3.62%\n                                            \n\n                                                # 2\n                                            \n\n\n\n\n\n\n                                                        -\n                                                    \n\n\n\n\n\n                                                3D Face Reconstruction\n                                            \n\n                                    

In [246]:

# # iterates through each task in the task_headings list
# # for i in range(0, (len(task_headings) - 1)):
# for i in range(0, 7):
    
#     # iterates through each dataset in the task_heading
#     for j in range(len(datasets[i])):
        
#         url = "https://paperswithcode.com/sota/"+ task_headings[i] + "-on-" + datasets[i][j]
        
#         html_doc = requests.get(url).content
#         soup = BeautifulSoup(html_doc, 'html.parser')

#         # extracts the json data from the evaluation table on each page 
#         data.append(json.loads(soup.find('script', id='evaluation-table-data').text))




https://paperswithcode.com/sota/3d-face-reconstruction-on-aflw2000-3d
https://paperswithcode.com/sota/3d-face-reconstruction-on-florence
https://paperswithcode.com/sota/3d-human-pose-estimation-on-chall-h80k
https://paperswithcode.com/sota/3d-human-pose-estimation-on-human36m
https://paperswithcode.com/sota/3d-medical-imaging-segmentation-on-tcia


In [243]:
# # subtask_datasets[0]

# d = {}

# for i in range(3):
#     d[subtask_datasets[i]] = data[i]

# d

In [247]:
# xx = json_normalize(d).unstack().apply(pd.Series)

# xx

In [254]:
# a = data[0][0]

# a

In [253]:
# # 
# xx = pd.io.json.json_normalize(a, record_path = 'metrics', record_prefix='data.', meta='metrics')

# xx

In [None]:
# works_data = json_normalize(data = data['programs'], record_path='works', 
#                             meta=['id', 'orchestra','programID', 'season'])
# works_data.head(3)

Unnamed: 0,evaluation_date,method,metrics.Average 3D Error,metrics.Dice Score,metrics.MPJPE,metrics.Mean NME,paper.code,paper.id,paper.published,paper.title,paper.url,rank,row_id,table_id,uses_additional_data
0,2018-03-21,PRN,,,,3.9625%,True,7837,2018-03-21T00:00:00.000000,Joint 3D Face Reconstruction and Dense Alignme...,/paper/joint-3d-face-reconstruction-and-dense,1,2395,316,False
1,2015-11-23,3DDFA,,,,5.3695%,False,37498,2015-11-23T00:00:00.000000,Face Alignment Across Large Poses: A 3D Solution,/paper/face-alignment-across-large-poses-a-3d,2,2397,316,False
2,2017-09-05,DeFA,,,,5.6454%,False,17949,2017-09-05T00:00:00.000000,Dense Face Alignment,/paper/dense-face-alignment,3,2396,316,False
3,2018-03-21,PRN,,,,3.7551%,True,7837,2018-03-21T00:00:00.000000,Joint 3D Face Reconstruction and Dense Alignme...,/paper/joint-3d-face-reconstruction-and-dense,1,2398,317,False
4,2017-03-22,VRN-Guided,,,,5.2667%,False,17815,2017-03-22T00:00:00.000000,Large Pose 3D Face Reconstruction from a Singl...,/paper/large-pose-3d-face-reconstruction-from-a,2,2400,317,False
5,2015-11-23,3DDFA,,,,6.3833%,False,37498,2015-11-23T00:00:00.000000,Face Alignment Across Large Poses: A 3D Solution,/paper/face-alignment-across-large-poses-a-3d,3,2399,317,False
6,2019-02-15,GANFit,0.95,,,,True,106217,2019-02-15T00:00:00.000000,GANFIT: Generative Adversarial Network Fitting...,/paper/ganfit-generative-adversarial-network-f...,4,4513,317,False
7,2018-06-15,Unsupervised-3DMMR,1.5,,,,True,197,2018-06-15T00:00:00.000000,Unsupervised Training for 3D Morphable Model R...,/paper/unsupervised-training-for-3d-morphable-...,5,4514,317,False
8,2017-01-19,itwmm,1.82,,,,False,26933,2017-01-19T00:00:00.000000,"3D Face Morphable Models ""In-the-Wild""",/paper/3d-face-morphable-models-in-the-wild,6,4516,317,False
9,2016-12-15,3DMM-CNN,1.93,,,,True,27712,2016-12-15T00:00:00.000000,Regressing Robust and Discriminative 3D Morpha...,/paper/regressing-robust-and-discriminative-3d,7,4515,317,False


In [176]:
xx = json_normalize(d).unstack().apply(pd.Series)

xx

Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6
/sota/3d-face-reconstruction-on-aflw2000-3d,0,"{'table_id': 316, 'row_id': 2395, 'rank': 1, '...","{'table_id': 316, 'row_id': 2397, 'rank': 2, '...","{'table_id': 316, 'row_id': 2396, 'rank': 3, '...",,,,
/sota/3d-face-reconstruction-on-florence,0,"{'table_id': 317, 'row_id': 2398, 'rank': 1, '...","{'table_id': 317, 'row_id': 2400, 'rank': 2, '...","{'table_id': 317, 'row_id': 2399, 'rank': 3, '...","{'table_id': 317, 'row_id': 4513, 'rank': 4, '...","{'table_id': 317, 'row_id': 4514, 'rank': 5, '...","{'table_id': 317, 'row_id': 4516, 'rank': 6, '...","{'table_id': 317, 'row_id': 4515, 'rank': 7, '..."
/sota/3d-human-pose-estimation-on-chall-h80k,0,"{'table_id': 213, 'row_id': 2011, 'rank': 1, '...",,,,,,


In [168]:
# 
xx = beddit_data = pd.io.json.json_normalize(d, record_prefix='data.', meta='_id')

xx

Unnamed: 0,/sota/3d-face-reconstruction-on-aflw2000-3d,/sota/3d-face-reconstruction-on-florence,/sota/3d-human-pose-estimation-on-chall-h80k
0,"[{'table_id': 316, 'row_id': 2395, 'rank': 1, ...","[{'table_id': 317, 'row_id': 2398, 'rank': 1, ...","[{'table_id': 213, 'row_id': 2011, 'rank': 1, ..."


In [169]:
x3 = xx['data.properties'].apply(pd.Series)

x3

KeyError: 'data.properties'

In [128]:
# x = json_normalize(data[0]['metrics'])

# x.head()

data.keys

AttributeError: 'list' object has no attribute 'keys'

In [125]:
y = json_normalize(data[1])

y.head()

Unnamed: 0,evaluation_date,method,metrics.Average 3D Error,metrics.Mean NME,paper.code,paper.id,paper.published,paper.title,paper.url,rank,row_id,table_id,uses_additional_data
0,2018-03-21,PRN,,3.7551%,True,7837,2018-03-21T00:00:00.000000,Joint 3D Face Reconstruction and Dense Alignme...,/paper/joint-3d-face-reconstruction-and-dense,1,2398,317,False
1,2017-03-22,VRN-Guided,,5.2667%,False,17815,2017-03-22T00:00:00.000000,Large Pose 3D Face Reconstruction from a Singl...,/paper/large-pose-3d-face-reconstruction-from-a,2,2400,317,False
2,2015-11-23,3DDFA,,6.3833%,False,37498,2015-11-23T00:00:00.000000,Face Alignment Across Large Poses: A 3D Solution,/paper/face-alignment-across-large-poses-a-3d,3,2399,317,False
3,2019-02-15,GANFit,0.95,,True,106217,2019-02-15T00:00:00.000000,GANFIT: Generative Adversarial Network Fitting...,/paper/ganfit-generative-adversarial-network-f...,4,4513,317,False
4,2018-06-15,Unsupervised-3DMMR,1.5,,True,197,2018-06-15T00:00:00.000000,Unsupervised Training for 3D Morphable Model R...,/paper/unsupervised-training-for-3d-morphable-...,5,4514,317,False


In [84]:
df_info = json_normalize(data[0]).unstack().apply(pd.Series)
df_info

Unnamed: 0,Unnamed: 1,0
evaluation_date,0,2018-03-21
evaluation_date,1,2015-11-23
evaluation_date,2,2017-09-05
method,0,PRN
method,1,3DDFA
method,2,DeFA
metrics.Mean NME,0,3.9625%
metrics.Mean NME,1,5.3695%
metrics.Mean NME,2,5.6454%
paper.code,0,True


In [25]:
#
for i in range(2):
    
    print(i)
# nycphil = json_normalize(d['programs'])
    df = json_normalize(data[1])

# df
df

0
1


Unnamed: 0,evaluation_date,method,metrics.Area,metrics.Food,metrics.Joint,metrics.Price,metrics.Request,paper.code,paper.id,paper.published,paper.title,paper.url,rank,row_id,table_id,uses_additional_data
0,2018-10-22,StateNet,,,75.5,,,True,60150.0,2018-10-22T00:00:00.000000,Towards Universal Dialogue State Tracking,/paper/towards-universal-dialogue-state-tracking,1,4709,17,False
1,2018-05-19,Zhong et al.,-,-,74.5,-,97.5,True,580.0,2018-05-19T00:00:00.000000,Global-Locally Self-Attentive Dialogue State T...,/paper/global-locally-self-attentive-dialogue-...,2,50,17,False
2,2016-06-12,Neural belief tracker,90,84,73.4,94,96.5,False,23790.0,2016-06-12T00:00:00.000000,Neural Belief Tracker: Data-Driven Dialogue St...,/paper/neural-belief-tracker-data-driven-dialogue,3,52,17,False
3,2018-04-18,Liu et al.,90,84,72.0,92,-,True,5763.0,2018-04-18T00:00:00.000000,Dialogue Learning with Human Teaching and Feed...,/paper/dialogue-learning-with-human-teaching-and,4,51,17,False
4,,RNN,92,86,69.0,86,95.7,False,,,,,5,53,17,False


In [454]:
# creates empty subtask datasets list
subtask_ = []

# iterates through each of the tasks
for i in range(len(subtask_headings)):
    
    # sets a new url for each of the task headings
    url = 'https://paperswithcode.com/task/' + subtask_headings[i]
    dataset_tag = 'dataset black-links'
    
    # invokes the return_dataset function to return
    # and append each of the datasets to the datasets list 
    subtask_datasets.append(return_dataset(url, dataset_tag))

In [591]:
# print(subtask_datasets)

## Extracting the leaderboard

In [620]:
# initialises the Chrome webdriver
# driver = webdriver.Chrome()

url = "https://paperswithcode.com" + subtask_datasets[0]
html = requests.get(url).content
soup = BeautifulSoup(html, 'html.parser')

# driver.get(url)
print(url)

https://paperswithcode.com/sota/unsupervised-semantic-segmentation-on-potsdam-1


In [608]:
# # initialises the Chrome webdriver
# driver = webdriver.Chrome()

# # iterates through each task in the task_headings list
# for i in range(len(subtask_headings)):
    
#     # iterates through each dataset in the task_heading
#     for j in range(len(subtask_datasets[i])):
        
#         print("https://paperswithcode.com/sota/"+ subtask_headings[i] + "-on-" + subtask_datasets[i][j])

#         # directs the driver to the paperswithcode webpage
#         driver.get("https://paperswithcode.com/sota/"+ subtask_headings[i] + "-on-" + subtask_datasets[i][j])

In [579]:
# initialises the Chrome webdriver
# driver = webdriver.Chrome()
        
# directs the driver to the paperswithcode webpage
# driver.get("https://paperswithcode.com/task/few-shot-image-classification")



In [None]:
###################

In [618]:
#


In [590]:
# initialises the Chrome webdriver
driver = webdriver.Chrome()

# iterates through each task in the task_headings list
for i in range(len(subtask_datasets)):
        
    print("https://paperswithcode.com" + subtask_datasets[i])
        
    # directs the driver to the paperswithcode webpage
    driver.get("https://paperswithcode.com"+ subtask_datasets[i])

https://paperswithcode.com/sota/image-classification-on-inaturalist
https://paperswithcode.com/sota/image-classification-on-svhn
https://paperswithcode.com/sota/image-classification-on-msrc-21-per-class
https://paperswithcode.com/sota/image-classification-on-cinic-10
https://paperswithcode.com/sota/image-classification-on-stl-10
https://paperswithcode.com/sota/object-detection-on-pascal-voc-2007
https://paperswithcode.com/sota/image-classification-on-cifar-10
https://paperswithcode.com/sota/image-classification-on-fashion-mnist
https://paperswithcode.com/sota/image-classification-on-imagenet
https://paperswithcode.com/sota/image-classification-on-mnist
https://paperswithcode.com/sota/image-classification-on-multimnist
https://paperswithcode.com/sota/image-classification-on-msrc-21-per-pixel
https://paperswithcode.com/sota/image-classification-on-cifar-100


In [446]:
# initialises the Chrome webdriver
driver = webdriver.Chrome()

# iterates through each task in the task_headings list
for i in range(0, (len(task_headings) - 1)):
    
    # iterates through each dataset in the task_heading
    for j in range(len(datasets[i])):
        
        print("https://paperswithcode.com/sota/"+ task_headings[i] + "-on-" + datasets[i][j])

        # directs the driver to the paperswithcode webpage
        driver.get("https://paperswithcode.com/sota/"+ task_headings[i] + "-on-" + datasets[i][j])

TypeError: must be str, not list

## Extracting leaderboard data

In [380]:
datasets[0][1]

task_headings[7]

'denoising'

In [370]:
# empty data list
data = []

# iterates through each task in the task_headings list
# for i in range(0, (len(task_headings) - 1)):
for i in range(0, 7):
    
    # iterates through each dataset in the task_heading
    for j in range(len(datasets[i])):
        
        url = "https://paperswithcode.com/sota/"+ task_headings[i] + "-on-" + datasets[i][j]
        
        html_doc = requests.get(url).content
        soup = BeautifulSoup(html_doc, 'html.parser')

        # extracts the json data from the evaluation table on each page 
        data.append(json.loads(soup.find('script', id='evaluation-table-data').text))
        
#         print("https://paperswithcode.com/sota/"+ task_headings[i] + "-on-" + datasets[i][j])

        # directs the driver to the paperswithcode webpage
#         driver.get("https://paperswithcode.com/sota/"+ task_headings[i] + "-on-" + datasets[i][j])


AttributeError: 'NoneType' object has no attribute 'text'

In [351]:
#
url = 'https://paperswithcode.com/sota/semantic-segmentation-on-cityscapes'
heading_tag = 'title'

html_doc = requests.get(url).content
soup = BeautifulSoup(html_doc, 'html.parser')

# extracts the json data from the evaluation table on each page 
data = json.loads(soup.find('script', id='evaluation-table-data').text)

# normalizes the json data

# nycphil = json_normalize(d['programs'])
df = json_normalize(data)

# creates a metric column from the metric.index column title
df['metric'] = df.columns[2]

# strips the first 8 characters
df['metric'] = df['metric'].map(lambda x: str(x)[8:])

df

Unnamed: 0,evaluation_date,method,metrics.Mean IoU,paper.code,paper.id,paper.published,paper.title,paper.url,rank,row_id,table_id,uses_additional_data,metric
0,2018-02-07,DeepLabv3+ (Xception-JFT),82.1%,True,8632,2018-02-07T00:00:00.000000,Encoder-Decoder with Atrous Separable Convolut...,/paper/encoder-decoder-with-atrous-separable,1,1919,187,True,Mean IoU
1,2017-12-07,Mapillary,82.0%,True,13180,2017-12-07T00:00:00.000000,In-Place Activated BatchNorm for Memory-Optimi...,/paper/in-place-activated-batchnorm-for-memory,2,1920,187,True,Mean IoU
2,2018-09-04,OCNet,81.7%,True,56189,2018-09-04T00:00:00.000000,OCNet: Object Context Network for Scene Parsing,/paper/ocnet-object-context-network-for-scene,3,1938,187,False,Mean IoU
3,2018-09-09,Dual Attention Network,81.5%,True,56731,2018-09-09T00:00:00.000000,Dual Attention Network for Scene Segmentation,/paper/dual-attention-network-for-scene-segmen...,4,2154,187,False,Mean IoU
4,2016-12-04,PSPNet,81.2%,True,23531,2016-12-04T00:00:00.000000,Pyramid Scene Parsing Network,/paper/pyramid-scene-parsing-network,5,1921,187,False,Mean IoU
5,2016-11-30,ResNet-38,80.6%,True,28212,2016-11-30T00:00:00.000000,Wider or Deeper: Revisiting the ResNet Model f...,/paper/wider-or-deeper-revisiting-the-resnet-m...,6,1922,187,False,Mean IoU
6,2018-04-25,Smooth Network with Channel Attention Block,80.3%,True,5153,2018-04-25T00:00:00.000000,Learning a Discriminative Feature Network for ...,/paper/learning-a-discriminative-feature-netwo...,7,2875,187,False,Mean IoU
7,2018-08-02,BiSeNet,78.9%,True,54112,2018-08-02T00:00:00.000000,BiSeNet: Bilateral Segmentation Network for Re...,/paper/bisenet-bilateral-segmentation-network-for,8,2871,187,False,Mean IoU
8,2019-03-20,SwiftNetRN-18,75.5%,True,109048,2019-03-20T00:00:00.000000,In Defense of Pre-trained ImageNet Architectur...,/paper/in-defense-of-pre-trained-imagenet,9,4326,187,False,Mean IoU
9,2016-11-24,FRRN,71.8%,True,27973,2016-11-24T00:00:00.000000,Full-Resolution Residual Networks for Semantic...,/paper/full-resolution-residual-networks-for,10,2159,187,False,Mean IoU


In [302]:
import requests
from bs4 import BeautifulSoup

base_url = 'https://paperswithcode.com/sota/semantic-segmentation-on-pascal-voc-2012'

r = requests.get(base_url)

soup = BeautifulSoup(r.text, 'html.parser')

user_name = soup.find(class_='evaluation-table-data')
print(user_name)

None


In [332]:
#


In [104]:
# initialises the Chrome webdriver
driver = webdriver.Chrome()

# iterates through each task in the task_headings list
for i in range(len(task_headings)):

    # directs the driver to the paperswithcode webpage
    driver.get("https://paperswithcode.com/task/"+ task_headings[i])

In [425]:
# initialises the Chrome webdriver
# driver = webdriver.Chrome()

# iterates through each task in the task_headings list
for i in range(len(area_headings)):
    
    for j in range(len(task_headings[i])):
        
        for k in range(len(subtask_headings[j])):
            
            print("https://paperswithcode.com/area/"+ area_headings[i] + "/" + subtask_headings[j][k])

            # directs the driver to the paperswithcode webpage
#             driver.get("https://paperswithcode.com/area/"+ area_headings[i] + "/" + subtask_headings[i][j])

https://paperswithcode.com/area/computer-vision/semantic-segmentation-subtasks
https://paperswithcode.com/area/computer-vision/semantic-segmentation
https://paperswithcode.com/area/computer-vision/real-time-semantic-segmentation
https://paperswithcode.com/area/computer-vision/scene-segmentation
https://paperswithcode.com/area/computer-vision/3d-part-segmentation
https://paperswithcode.com/area/computer-vision/weakly-supervised-semantic-segmentation
https://paperswithcode.com/area/computer-vision/semi-supervised-semantic-segmentation
https://paperswithcode.com/area/computer-vision/panoptic-segmentation
https://paperswithcode.com/area/computer-vision/unsupervised-semantic-segmentation
https://paperswithcode.com/area/computer-vision/image-classification-subtasks
https://paperswithcode.com/area/computer-vision/image-classification
https://paperswithcode.com/area/computer-vision/few-shot-image-classification
https://paperswithcode.com/area/computer-vision/semi-supervised-image-classificatio

https://paperswithcode.com/area/miscellaneous/6d-pose-estimation
https://paperswithcode.com/area/miscellaneous/head-pose-estimation
https://paperswithcode.com/area/miscellaneous/human-pose-forecasting
https://paperswithcode.com/area/miscellaneous/animal-pose-estimation
https://paperswithcode.com/area/miscellaneous/super-resolution-subtasks
https://paperswithcode.com/area/miscellaneous/super-resolution
https://paperswithcode.com/area/miscellaneous/image-super-resolution
https://paperswithcode.com/area/miscellaneous/video-super-resolution
https://paperswithcode.com/area/miscellaneous/3d-object-super-resolution
https://paperswithcode.com/area/miscellaneous/depth-map-super-resolution
https://paperswithcode.com/area/miscellaneous/denoising-subtasks
https://paperswithcode.com/area/miscellaneous/denoising
https://paperswithcode.com/area/miscellaneous/image-denoising
https://paperswithcode.com/area/miscellaneous/autonomous-vehicles-subtasks
https://paperswithcode.com/area/miscellaneous/autonom

In [108]:
#
url = 'https://paperswithcode.com/task/"+ task_headings[1]'

#
dataset_tag = 'table-striped'
# for row in soup.find_all('div',attrs={"class" : "reviewText"}):
#     print row.text
# dataset black-links

# invokes the return_dataset function to return each of the area headings
task_datasets = return_dataset(url, dataset_tag)

print(task_datasets)

[]
