In [1]:
# imports
import requests
import json
import os
import time
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options

In [13]:
# let's speed up the scraping proccess by scraping multiple urls in one batch

# generic url scraper

def url_scraper_batch(
                urlbase = '',
                urls = [],
                selenium = True,
                windowSize = "1280,720",
                headless = True,
                quitOnEnd = True,
                waitForElement = '',
                waitForId = '',
                waitForClass = '',
                pressLink = [],
                waitBetweenPress = 30,
                waitUntilTimeout = 20,
               ):
    #meta = {
    #    'url': url
    #}
    
    # simple version
    if selenium == False:
        #page = requests.get(url)
        #meta['status'] = page.status_code
        #html = page.content
        print('batched url is for selenium only')
        return ''
      
    # selenium version
    if selenium == True:
        CHROMEDRIVER_PATH = './.selenium/chromedriver'
        WINDOW_SIZE = windowSize

        chrome_options = Options()
        if headless:
            chrome_options.add_argument("--headless")
        chrome_options.add_argument("--window-size=%s" % WINDOW_SIZE)

        driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH, options=chrome_options)
        driver.implicitly_wait(waitBetweenPress)
        
        result = []
        for url in urls:
            item = {
                'url': url
            }
            
            driver.get(urlbase + url)

            # wait for a given tag to be loaded (javascript generated code)
            if waitForElement != '':
                try:
                    element = WebDriverWait(driver, waitUntilTimeout).until(EC.visibility_of_element_located((By.TAG_NAME, waitForElement)))
                    print ("Page is ready!", urlbase + url)
                except TimeoutException:
                    print ("Loading took too much time!", urlbase + url)

            # wait for a given ID to be loaded (javascript generated code)
            if waitForId != '':
                try:
                    element = WebDriverWait(driver, waitUntilTimeout).until(EC.visibility_of_element_located((By.ID, waitForId)))
                    print ("Page is ready!", urlbase + url)
                except TimeoutException:
                    print ("Loading took too much time!", urlbase + url)

            # wait for a given class to be loaded (javascript generated code)
            if waitForClass != '':
                try:
                    # not-found
                    element = WebDriverWait(driver, waitUntilTimeout).until(EC.visibility_of_element_located((By.CLASS_NAME, waitForClass)))
                    print ("Page is ready!", urlbase + url)
                except TimeoutException:
                    print ("Loading took too much time!", urlbase + url)

            # press link
            if len(pressLink)>0:
                driver.implicitly_wait(waitBetweenPress)
                try:
                    for link in pressLink:
                        print(link)
                        element = WebDriverWait(driver, waitUntilTimeout).until(EC.visibility_of_element_located((By.LINK_TEXT, link)))
                        element.click()
                except TimeoutException:
                    print ("Loading took too much time!", urlbase + url)

            item['html'] = driver.page_source
            result.append(item)
            
            
        if quitOnEnd:
            driver.close()
    
    return result

urlbase = 'https://www.kaggle.com'
urls = [
    '/austinreese/craigslist-carstrucks-data',
    '/google/tinyquickdraw',
    '/babyoda/women-entrepreneurship-and-labor-force',
]

result = url_scraper_batch(urlbase, urls, waitForClass='content-box', headless = False, quitOnEnd = True)
#print(result)
for item in result:
    print(item['url'], len(item['html']))

Page is ready! https://www.kaggle.com/austinreese/craigslist-carstrucks-data
Page is ready! https://www.kaggle.com/google/tinyquickdraw
Page is ready! https://www.kaggle.com/babyoda/women-entrepreneurship-and-labor-force
/austinreese/craigslist-carstrucks-data 177273
/google/tinyquickdraw 162837
/babyoda/women-entrepreneurship-and-labor-force 148958


In [3]:
# generic store data to file function
def store_data(data, file, mode='w', toJson=False):
    if toJson:
        data = json.dumps(data)
    with open(file, mode, encoding='utf-8') as fp:
        result = fp.write(data)
        return result
    
# generic load data from file function
def load_data(file, fromJson=False):
    if os.path.isfile(file):
        with open(file, 'r', encoding='utf-8', errors="ignore") as fp:
            data = fp.read()
            if fromJson:
                data = json.loads(data)
            return data
    else:
        return 'file not found'

# test text
print(store_data('Hello', '../data/repositories/mlart/test.txt'))
print(load_data('../data/repositories/mlart/test.txt'))

# test json
print(store_data({'msg':'Hello World'}, '../data/repositories/mlart/test.json', toJson=True))
print(load_data('../data/repositories/mlart/test.json', fromJson=True))

5
Hello
22
{'msg': 'Hello World'}


In [4]:
# helper function to create folder create_folder
def create_folder(path):
    if not os.path.exists(os.path.dirname(path)):
        try:
            os.makedirs(os.path.dirname(path))
            print(path + ' created')
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

In [40]:
# get all sites from csv and store them for later feature scraping

# competitions
csv = '../data/repositories/kaggle/kaggle_competitions.csv'
folder = '../data/repositories/kaggle/competitions'

# datasets
#csv = '../data/repositories/kaggle/kaggle_datasets.csv'
#folder = '../data/repositories/kaggle/datasets'

df = pd.read_csv(csv)
df_links = list(df['link'])
batch_size = 25

chunks = [df_links[i:i + batch_size] for i in range(0, len(df_links), batch_size)]
chunks_len = len(chunks)
print('total/chunks', len(df_links), len(chunks))

for i, chunk in enumerate(chunks):
    print('### chunk', i, '/', chunks_len, '###')
    
    # check if chunk is already scraped
    chunk_datasets = []
    chunk_notebooks = []
    for j, link in enumerate(chunk):
        print('# chunk', i, '/', chunks_len, '# item ', j, link)

        temp = link.split('/')
        author = temp[1]
        name = temp[2]
        name = name.replace(' ','_').replace(':','')
        #print(author, name)
    
        # check datasets
        file = os.path.join(folder, author, name+'/dataset.html')
        if not os.path.isfile(file):
            #print('dataset.html not found', file)
            chunk_datasets.append(link)
            
            # create folders
            create_folder(os.path.join(folder, author+'/'))
            create_folder(os.path.join(folder, author, name+'/'))
        #else:
        #    print('dataset.html already exists')
        
        # check notebooks
        file = os.path.join(folder, author, name+'/notebooks.html')
        if not os.path.isfile(file):
            #print('notebooks.html not found', file)
            chunk_notebooks.append(link+'/notebooks?sortBy=voteCount')
            
            # create folders
            #create_folder(os.path.join(folder, author+'/'))
            #create_folder(os.path.join(folder, author, name+'/'))
        #else:
        #    print('notebooks.html already exists')
        
    
    #print(len(chunk_output), chunk_output)
    
    # get content for chunk (datasets)
    if len(chunk_datasets) > 0:
        result = url_scraper_batch(urlbase, chunk_datasets, waitForClass='content-box', headless = True, quitOnEnd = True)
    
        for item in result:
            print(item['url'], len(item['html']))
            #file = os.path.join(folder, item['url']) + '/dataset.html'
            file = folder + item['url'] + '/dataset.html'
            file = file.replace(' ','_').replace(':','')
            print(file)
            store_data(item['html'], file)
    
    # get content for chunk (notebooks)
    if len(chunk_notebooks) > 0:
        result = url_scraper_batch(urlbase, chunk_notebooks, waitForClass='sc-qcrOD', headless = True, quitOnEnd = True)
        
        for item in result:
            print(item['url'], len(item['html']))
            #file = os.path.join(folder, item['url']) + '/notebooks.html'
            file = folder + item['url'] + '.html'
            file = file.replace(' ','_').replace(':','').replace('?sortBy=voteCount','')
            print(file)
            store_data(item['html'], file)
    
        #break
    



total/chunks 419 17
### chunk 0 / 17 ###
# item 0 / 17 # 0 /c/stanford-covid-vaccine
# item 0 / 17 # 1 /c/rsna-str-pulmonary-embolism-detection
# item 0 / 17 # 2 /c/lish-moa
# item 0 / 17 # 3 /c/conways-reverse-game-of-life-2020
# item 0 / 17 # 4 /c/lyft-motion-prediction-autonomous-vehicles
# item 0 / 17 # 5 /c/landmark-recognition-2020
# item 0 / 17 # 6 /c/osic-pulmonary-fibrosis-progression
# item 0 / 17 # 7 /c/landmark-retrieval-2020
# item 0 / 17 # 8 /c/birdsong-recognition
# item 0 / 17 # 9 /c/halite
# item 0 / 17 # 10 /c/siim-isic-melanoma-classification
# item 0 / 17 # 11 /c/trec-covid-information-retrieval
# item 0 / 17 # 12 /c/open-images-instance-segmentation-rvc-2020
# item 0 / 17 # 13 /c/open-images-object-detection-rvc-2020
# item 0 / 17 # 14 /c/covid19-global-forecasting-week-5
# item 0 / 17 # 15 /c/global-wheat-detection
# item 0 / 17 # 16 /c/alaska2-image-steganalysis
# item 0 / 17 # 17 /c/hashcode-photo-slideshow
# item 0 / 17 # 18 /c/trends-assessment-prediction
# it

Page is ready! https://www.kaggle.com/c/jigsaw-multilingual-toxic-comment-classification/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/imaterialist-fashion-2020-fgvc7/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/covid19-global-forecasting-week-1/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/covid19-local-us-ca-forecasting-week-1/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/plant-pathology-2020-fgvc7/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/iwildcam-2020-fgvc7/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/herbarium-2020-fgvc7/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/m5-forecasting-accuracy/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/m5-forecasting-uncertainty/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/liverpool-ion-switching/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle

Page is ready! https://www.kaggle.com/c/bigquery-geotab-intersection-congestion/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/cat-in-the-dat/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/understanding_cloud_organization/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/ciphertext-challenge-iii/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/severstal-steel-defect-detection/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/kuzushiji-recognition/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/ieee-fraud-detection/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/open-images-2019-instance-segmentation/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/generative-dog-images/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.co

Page is ready! https://www.kaggle.com/c/womens-machine-learning-competition-2019/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/santander-customer-transaction-prediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/dont-overfit-ii/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/tmdb-box-office-prediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/gendered-pronoun-resolution/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/LANL-Earthquake-Prediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/petfinder-adoption-prediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/vsb-power-line-fault-detection/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/reducing-commercial-aviation-fatalities/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/20-newsgroups-ciphertext-challenge/notebooks?sortBy=voteCount
Page is ready

Page is ready! https://www.kaggle.com/c/google-ai-open-images-visual-relationship-track/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/google-ai-open-images-object-detection-track/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/demand-forecasting-kernels-only/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/flavours-of-physics-kernels-only/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/whats-cooking-kernels-only/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/movie-review-sentiment-analysis-kernels-only/notebooks?sortBy=voteCount
/c/NFL-Punt-Analytics-Competition/notebooks?sortBy=voteCount 297460
../data/repositories/kaggle/competitions/c/NFL-Punt-Analytics-Competition/notebooks.html
/c/humpback-whale-identification/notebooks?sortBy=voteCount 224699
../data/repositories/kaggle/competitions/c/humpback-whale-identification/notebooks.html
/c/elo-merchant-category-recommendation/notebooks?sortBy

Page is ready! https://www.kaggle.com/c/nomad2018-predict-transparent-conductors/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/santa-gift-matching/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/recruit-restaurant-visitor-forecasting/notebooks?sortBy=voteCount
/c/forest-cover-type-kernels-only/notebooks?sortBy=voteCount 226149
../data/repositories/kaggle/competitions/c/forest-cover-type-kernels-only/notebooks.html
/c/santander-value-prediction-challenge/notebooks?sortBy=voteCount 220443
../data/repositories/kaggle/competitions/c/santander-value-prediction-challenge/notebooks.html
/c/youtube8m-2018/notebooks?sortBy=voteCount 208323
../data/repositories/kaggle/competitions/c/youtube8m-2018/notebooks.html
/c/home-credit-default-risk/notebooks?sortBy=voteCount 224704
../data/repositories/kaggle/competitions/c/home-credit-default-risk/notebooks.html
/c/trackml-particle-identification/notebooks?sortBy=voteCount 271509
../data/repositories/kaggle/compet

Page is ready! https://www.kaggle.com/c/zillow-prize-1/notebooks?sortBy=voteCount
/c/plant-seedlings-classification/notebooks?sortBy=voteCount 165121
../data/repositories/kaggle/competitions/c/plant-seedlings-classification/notebooks.html
/c/mercari-price-suggestion-challenge/notebooks?sortBy=voteCount 236399
../data/repositories/kaggle/competitions/c/mercari-price-suggestion-challenge/notebooks.html
/c/tensorflow-speech-recognition-challenge/notebooks?sortBy=voteCount 241377
../data/repositories/kaggle/competitions/c/tensorflow-speech-recognition-challenge/notebooks.html
/c/spooky-author-identification/notebooks?sortBy=voteCount 229487
../data/repositories/kaggle/competitions/c/spooky-author-identification/notebooks.html
/c/statoil-iceberg-classifier-challenge/notebooks?sortBy=voteCount 228004
../data/repositories/kaggle/competitions/c/statoil-iceberg-classifier-challenge/notebooks.html
/c/favorita-grocery-sales-forecasting/notebooks?sortBy=voteCount 229636
../data/repositories/kaggle

Page is ready! https://www.kaggle.com/c/talkingdata-mobile-user-demographics/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/grupo-bimbo-inventory-demand/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/integer-sequence-learning/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/ultrasound-nerve-segmentation/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/facebook-v-predicting-check-ins/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/avito-duplicate-ads-detection/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/painter-by-numbers/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/draper-satellite-image-chronology/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/expedia-hotel-recommendations/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/kobe-bryant-shot-selection/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.co

Page is ready! https://www.kaggle.com/c/deloitte-western-australia-rental-prices/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/springleaf-marketing-response/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/dato-native/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/flavours-of-physics/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/coupon-purchase-prediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/introducing-kaggle-scripts/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/liberty-mutual-group-property-inspection-prediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/machinery-tube-pricing/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/grasp-and-lift-eeg-detection/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/sf-crime/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/avito-c

Page is ready! https://www.kaggle.com/c/word2vec-nlp-tutorial/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/poker-rule-induction/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/helping-santas-helpers/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/inria-bci-challenge/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/avazu-ctr-prediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/finding-elo/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/tradeshift-text-classification/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/afsis-soil-properties/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/seizure-prediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/street-view-getting-started-with-julia/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/liberty-

Loading took too much time! https://www.kaggle.com/c/lshtc/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/flight2-final/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/loan-default-prediction/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/march-machine-learning-mania-2014/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/galaxy-zoo-the-galaxy-challenge/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/genentech-flu-forecasting/notebooks?sortBy=voteCount
/c/wise-2014/notebooks?sortBy=voteCount 150220
../data/repositories/kaggle/competitions/c/wise-2014/notebooks.html
/c/random-acts-of-pizza/notebooks?sortBy=voteCount 175791
../data/repositories/kaggle/competitions/c/random-acts-of-pizza/notebooks.html
/c/bike-sharing-demand/notebooks?sortBy=voteCount 168869
../data/repositories/kaggle/competitions/c/bike-sharing-demand/notebooks.html
/c/seizure-detection/no

Loading took too much time! https://www.kaggle.com/c/the-icml-2013-whale-challenge-right-whale-redux/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/the-icml-2013-bird-challenge/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/facial-keypoints-detection/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/yelp-recsys-2013/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/kdd-cup-2013-author-disambiguation/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/kdd-cup-2013-author-paper-identification-challenge/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/predict-who-is-more-influential-in-a-social-network/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/challenges-in-representation-learning-the-black-box-learning-challenge/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/cha

Loading took too much time! https://www.kaggle.com/c/RxVolumePrediction/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/flight/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/hospital/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/facebook-ii/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/DarkWorlds/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/detecting-insults-in-social-commentary/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/cir-prospect/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/customer-retention/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/GEF2012-wind-forecasting/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/global-energy-forecasting-competition-2012-load-forecasting/notebooks?sortBy=voteCount

Page is ready! https://www.kaggle.com/c/asap-aes/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/benchmark-bond-trade-price-challenge/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/getting-started/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/GestureChallenge/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/WhatDoYouKnow/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/AlgorithmicTradingChallenge/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/PhotoQualityPrediction/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/DontGetKicked/notebooks?sortBy=voteCount
Loading took too much time! https://www.kaggle.com/c/SemiSupervisedFeatureLearning/notebooks?sortBy=voteCount
Page is ready! https://www.kaggle.com/c/GiveMeSomeCredit/notebooks?sortBy=voteCount
Loading took too much time! https:

In [54]:
# scrape links from notebooks.html

folder_base = '../data/repositories/kaggle/competitions/c/'
folder = '3d-object-detection-for-autonomous-vehicles/'
file = 'notebooks.html'
out = 'notebooks.json'
url = 'https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/notebooks?sortBy=voteCount'

def scrape_notebook_links(html):
    soup = BeautifulSoup(html, 'html.parser')
    result = []
    
    partial = soup.find('div', class_="km-list km-list--avatar-list km-list--three-line")
    if partial == None:
        return []
    items = partial.find_all('li', {"role": "listitem"})
    #print(len(items))
    
    for i, item in enumerate(items):
        #print(i, item)
        try:
            link = item.select('a.sc-qcrOD')[0].get('href')
            result.append(link)
        except:
            print('an error occured')
            break
        
        #print(link)
    
    return result

html = load_data(folder_base+folder+file)
#result = url_scraper_batch('', [url], waitForClass='sc-qcrOD', headless = False, quitOnEnd = True)
#html = result[0]['html']
links = scrape_notebook_links(html)
print(links)
store_data(links, folder_base+folder+out, toJson=True)

an error occured
['/tarunpaparaju/lyft-competition-understanding-the-data', '/gzuidhof/reference-model', '/rishabhiitbhu/eda-understanding-the-dataset-with-3d-plots', '/meaninglesslives/lyft3d-inference-kernel', '/hmendonca/kaggle-pytorch-utility-script', '/meaninglesslives/lyft3d-inference-prediction-visualization', '/xhlulu/lyft-eda-animations-generating-csvs', '/zikazika/what-is-object-detection-yolo', '/jesucristo/starter-devkit-lyft3d', '/stalkermustang/converting-lyft-dataset-to-kitty-format', '/lopuhin/lyft-3d-join-all-lidars-annotations-from-scratch', '/gaborfodor/eda-3d-object-detection-challenge', '/phunghieu/getting-started-with-3d-semantic-segmentation', '/asimandia/lyft3d-inference-kernel', '/stalkermustang/getting-data-for-nn-dataset', '/fartuk1/3d-segmentation-approach', '/rishabhiitbhu/visualizing-predictions', '/fanconic/yolov3-keras-image-object-detection', '/meaninglesslives/lyft3d-test-dataset', '/asimandia/reference-model']


941

In [59]:
# parse notebooks.html to notebooks.json (links)

path = '../data/repositories/kaggle/competitions/c/'
file = 'notebooks.html'
folders = os.listdir(folder_base)
for folder in folders:
    item = os.path.join(path,folder,file)
    print(item)
    if os.path.isfile(item):
        html = load_data(item)
        links = scrape_notebook_links(html)
        print(links)
        if len(links) > 0:
            output = item.replace('.html','.json')
            print(output)
            store_data(links, output, toJson=True)

../data/repositories/kaggle/competitions/c/15-071x-the-analytics-edge-competition-spring-2015\notebooks.html
[]
../data/repositories/kaggle/competitions/c/15-071x-the-analytics-edge-spring-20152\notebooks.html
[]
../data/repositories/kaggle/competitions/c/20-newsgroups-ciphertext-challenge\notebooks.html
an error occured
['/ashishpatel26/attension-layer-basic-for-nlp', '/rturley/a-first-crack-tools-tips-3-cipher-solutions', '/ashishpatel26/beginner-to-intermediate-nlp-tutorial', '/ashishpatel26/everything-you-want-to-know-about-20-ngctc', '/suicaokhoailang/one-model-for-each-difficulty-0-3691-lb', '/opanichev/lightgbm-and-simple-features', '/leflal/cipher-1-cipher-2-full-solutions', '/lbronchal/without-breaking-ciphers-0-48-lb', '/mithrillion/enigma-was-gimped-by-weather-reports', '/interneuron/difficulty-1-deciphering-wip', '/leflal/cipher-3-solution', '/ashishpatel26/stratified-kfold-hyperparameter-tuning', '/leflal/cipher-1-2-3-exact-matching-targets', '/lbronchal/don-t-waste-your-t

../data/repositories/kaggle/competitions/c/AlgorithmicTradingChallenge\notebooks.html
[]
../data/repositories/kaggle/competitions/c/allstate-claims-severity\notebooks.html
an error occured
['/sharmasanthosh/exploratory-study-on-ml-algorithms', '/mmueller/stacking-starter', '/dmi3kno/all-the-allstate-states-eda', '/iglovikov/xgb-1114', '/tilii7/bias-correction-xgboost', '/mtinti/keras-starter-with-bagging-1111-84364', '/danijelk/keras-starter-with-bagging-lb-1120-596', '/dmi3kno/allstate-eda', '/achalshah/allstate-feature-analysis-python', '/scirpus/markov-chain-monte-carlo', '/nigelcarpenter/farons-xgb-starter-ported-to-r', '/dmi3kno/allstate-fingerprints-eda', '/mariusbo/xgb-lb-1106-33084', '/guyko81/just-an-easy-solution', '/ameshkov/r-xgb-1107-23-unskew-encode-fold', '/misfyre/encoding-feature-comb-modkzs-1108-72665', '/mmueller/yet-another-xgb-starter', '/mmueller/categorical-embedding-with-xgb', '/nigelcarpenter/r-script-scoring-1113-93-on-plb', '/laurae2/sneak-peak-at-the-data-1'

../data/repositories/kaggle/competitions/c/avito-prohibited-content\notebooks.html
[]
../data/repositories/kaggle/competitions/c/awic2012\notebooks.html
[]
../data/repositories/kaggle/competitions/c/axa-driver-telematics-analysis\notebooks.html
[]
../data/repositories/kaggle/competitions/c/battlefin-s-big-data-combine-forecasting-challenge\notebooks.html
[]
../data/repositories/kaggle/competitions/c/belkin-energy-disaggregation-competition\notebooks.html
[]
../data/repositories/kaggle/competitions/c/benchmark-bond-trade-price-challenge\notebooks.html
[]
../data/repositories/kaggle/competitions/c/bengaliai-cv19\notebooks.html
an error occured
['/kaushal2896/bengali-graphemes-starter-eda-multi-output-cnn', '/iafoss/image-preprocessing-128x128', '/corochann/bengali-seresnext-training-with-pytorch', '/corochann/bengali-seresnext-prediction-with-pytorch', '/iafoss/grapheme-fast-ai-starter-lb-0-964', '/cdeotte/how-to-compete-with-gpus-workshop', '/haqishen/gridmask', '/rsmits/keras-efficient

../data/repositories/kaggle/competitions/c/boston-data-festival-hackathon\notebooks.html
[]
../data/repositories/kaggle/competitions/c/career-con-2019\notebooks.html
an error occured
['/jesucristo/1-smart-robots-most-complete-notebook', '/artgor/where-do-the-robots-drive', '/prashantkikani/help-humanity-by-helping-robots', '/ishivinal/hyperparamters-optimization-gs-rs-boa-tpe-hb-ga', '/gpreda/robots-need-help', '/hiralmshah/robot-sensor-eda-fe-and-prediction-improvement', '/theoviel/deep-learning-starter', '/friedchips/the-missing-link', '/ilhamfp31/fast-fourier-transform-denoising', '/prith189/starter-code-for-3rd-place-solution', '/jesucristo/1-smart-robots-complete-compilation', '/artgor/bayesian-optimization-for-robots', '/jsaguiar/surface-recognition-baseline', '/trohwer64/simple-fourier-analysis', '/pluceroo/new-features-lgbm-and-simple-rf', '/whoiskk/validation-strategy-randomforest-0-71', '/hsinwenchang/randomforestclassifier', '/artgor/basic-pytorch-lstm', '/purplejester/pytor

../data/repositories/kaggle/competitions/c/chess\notebooks.html
[]
../data/repositories/kaggle/competitions/c/ChessRatings2\notebooks.html
[]
../data/repositories/kaggle/competitions/c/cifar-10\notebooks.html
an error occured
['/roblexnana/cifar10-with-cnn-for-beginer', '/amithasanshuvo/cifar-images-classification-using-cnn', '/kedarsai/cifar-10-88-accuracy-using-keras', '/faizanurrahmann/cifar-10-object-classification-cnn-keras', '/yashvi/machine-learning-using-turi-create', '/vikasbhadoria/cifar10-high-accuracy-model-build-on-pytorch', '/kalashnimov/keras-callbacks-with-91-acc', '/pg1007/starting-with-cnn', '/ujjwalsharma26/cifar10-classification', '/aarooxx/object-recognition-90-88-accuracy', '/lkatran/blank-cifar-10', '/shaurov/cifar-image-classification-using-cnn-for-beginner', '/abhishekshaw21/cifar-10-using-pytorch', '/franckepeixoto/cifar-10-recognition-in-images-to-the-point', '/greatcodes/pytorch-cnn-resnet18-cifar10', '/olegmatsevych/notebook0d79dc2184', '/raghavjha858/cifar

../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-3\notebooks.html
an error occured
['/nitishabharathi/the-story-of-covid-19-in-india-eda-and-prediction', '/davidbnn92/weather-data', '/anjum48/seir-hcd-model', '/dferhadi/covid-19-predictions-growth-factor-and-calculus', '/eswarchandt/covid-19-forecasting-xgboost', '/yuanquan/covid-19-prediction-by-country-and-province', '/corochann/covid-19-effect-of-temperature-humidity', '/aerdem4/covid-19-basic-model-not-leaky', '/eswarchandt/timeseries-forecasting-of-covid-19-week-3-arima', '/madz2000/covid-19-week-3-analysis-prediction', '/mobassir/covid-19-in-bangladesh', '/gaborfodor/covid19-global-forecasting-top-submissions', '/janmejoy/covid19-time-series-analysis-plotly-visualization', '/mdmahmudferdous/covid-19-italy-forecasting-fb-prophet', '/mrmorj/covid-19-eda-xgboost', '/jorijnsmit/mathematical-solution-to-sigmoid-parameters', '/abhijithchandradas/sir-model-don-t-understand-calculus-don-t-worry', '/arpandas65/co

../data/repositories/kaggle/competitions/c/data-science-bowl-2019\notebooks.html
an error occured
['/artgor/quick-and-dirty-regression', '/robikscube/2019-data-science-bowl-an-introduction', '/erikbruin/data-science-bowl-2019-eda-and-baseline', '/gpreda/2019-data-science-bowl-eda', '/braquino/convert-to-regression', '/mhviraf/a-new-baseline-for-dsb-2019-catboost-model', '/shahules/xgboost-feature-selection-dsbowl', '/mhviraf/a-baseline-for-dsb-2019', '/caesarlupum/ds-bowl-start-here-a-gentle-introduction', '/artgor/oop-approach-to-fe-and-models', '/abhinand05/catboost-a-deeper-dive', '/fatihbilgin/data-science-bowl-2019-data-visualization', '/vipulgandhi/how-to-choose-right-metric-for-evaluating-ml-model', '/cpmpml/ultra-fast-qwk-calc-method', '/ragnar123/feature-engineering-v-1-0', '/hengzheng/bayesian-optimization-seed-blending', '/subbuvolvosekar/eda-visualization-story-dsb-2019', '/poteman/sampling-train-data-and-use-prediction-as-feature', '/gpreda/data-science-bowl-fast-compact-s

an error occured
['/kmader/inceptionv3-for-retinopathy-gpu-hr', '/meenavyas/diabetic-retinopathy-detection', '/kmader/tf-data-tutorial-with-retina-and-keras', '/tanlikesmath/diabetic-retinopathy-with-resnet50-oversampling', '/ayank77/college-work', '/benhamner/sample-images', '/tanumoynandy/diabeticretinopathyvgg16-finetuning', '/kmader/vgg16-640hr-nloss-retinopathy', '/amitasr/diabetic-retinopathy-detection', '/mitramishra93/diabetic-retinopathy-detection-cnn', '/viniciusaraujo/inceptionv3-for-retinopathy-gpu-hr-d82fb7', '/reeteshsingh/diabetic-retinopathy-using-pretrained-model', '/simonandersen/fast-ai-lesson-1-diabetic-retinopathy', '/gufranmirza/retinopathy', '/hirokazu12/capsnet-dr', '/bhargavbhatt/project-work', '/ralsaad/test-1', '/intelecai/diabetic-retinopathy-detection-bronze-place', '/amitasr/kernel4d37e6ad35', '/tahmina011/diabetic-retinopathy-detection-cnn']
../data/repositories/kaggle/competitions/c/diabetic-retinopathy-detection\notebooks.json
../data/repositories/kaggl

an error occured
['/bkamphaus/exploratory-image-analysis', '/vicensgaitan/image-registration-the-r-way', '/nigelcarpenter/akaze-stitching', '/laurae2/imagej-pre-processing-for-deep-learning', '/anokas/naive-beat-the-benchmark', '/yourwanghao/align-images', '/nigelcarpenter/homography-estimate-stitching-two-imag', '/dogrishin/svm-based-on-zoom-and-rotations-0-7', '/nigelcarpenter/draper-notebook', '/nigelcarpenter/akaze-keypoint-detector', '/asymptote/homography-estimate-stitching-two-imag', '/chabir/stitch-and-predict', '/stprior/mean-corner-detection', '/chefele/plot-some-image-sets', '/khaledfayed/plot-images2', '/wcukierski/plot-images', '/viswatejag/image-stiching-using-deeplearning', '/pramods/exploratory-image-analysis', '/weedislove/andyafter-stitching', '/weedislove/stitch-and-predict']
../data/repositories/kaggle/competitions/c/draper-satellite-image-chronology\notebooks.json
../data/repositories/kaggle/competitions/c/ds4g-environmental-insights-explorer\notebooks.html
an erro

an error occured
['/karanjakhar/facial-keypoint-detection', '/balraj98/data-augmentation-for-facial-keypoint-detection', '/madhawav/basic-fully-connected-nn', '/aparajit0511/facial-keypoint-detection-udacity', '/liudmyla/easy-keras-facial-keypoint-detection', '/sshikamaru/keras-cnn-starter', '/nitron/facial-keypoints-fastai-image-regression', '/obione26/facial-keypoints-detection-keras-albumentations', '/utkarsh4430/facial-keypoints-detection-basic-keras-model', '/phylake1337/2-15-loss-simple-split-trick', '/fmak95/facial-keypoint-detection', '/gakshaygupta/real-time-cnn-architecture', '/chaitanyagarikipati/facial-keypoints-detection-tensorflow-cnn', '/datawanderer/mixing-cnn-regularization-d', '/denisart/facial-keypoint-detection', '/vinodhb95/charlie-version2', '/mirmahathirmohammad/kaggle-facial-keypoint-detection', '/negi009/facial-keypoint-detection', '/mirodil/facial-keypoints-detection', '/mannsingh/facial-keypoints']
../data/repositories/kaggle/competitions/c/facial-keypoints-d

../data/repositories/kaggle/competitions/c/ga-customer-revenue-prediction\notebooks.html
an error occured
['/sudalairajkumar/simple-exploration-baseline-ga-customer-revenue', '/julian3833/1-quick-start-read-csv-and-flatten-json-fields', '/kabure/exploring-the-consumer-patterns-ml-pipeline', '/kailex/r-eda-for-gstore-glm-keras-xgb', '/erikbruin/google-analytics-eda-lightgbm-screenshots', '/captcalculator/a-very-extensive-gstore-exploratory-analysis', '/ogrellier/teach-lightgbm-to-sum-predictions', '/mrlong/r-flatten-json-columns-to-make-single-data-frame', '/artgor/eda-on-basic-data-and-lgb-in-progress', '/pavansanagapati/google-analytics-simple-exploration', '/ogrellier/i-have-seen-the-future', '/shivamb/exploratory-analysis-ga-customer-revenue', '/ogrellier/user-level-lightgbm-lb-1-4480', '/paulorzp/perfect-score-one-line-without-semicolon', '/ogrellier/using-classification-for-predictions', '/smasar/tutorial-preprocessing-processing-evaluation', '/kailex/group-xgb-for-gstore-v2', '/f

an error occured
['/robikscube/2020-march-madness-data-first-look-eda', '/headsortails/jump-shot-to-conclusions-march-madness-eda', '/artgor/march-madness-2020-ncaam-eda-and-baseline', '/ratan123/march-madness-2020-ncaam-simple-lightgbm-on-kfold', '/jaseziv83/moreyball-in-the-college-game-a-full-ncaa-eda', '/vbmokin/mm-ncaam-no-leaks-lgb-xgb-logreg', '/khoongweihao/ncaam2020-xgboost-lightgbm-k-fold-baseline', '/jaseziv83/applying-pythagorean-expectation-to-major-sports', '/lucabasa/are-men-s-and-women-s-tournaments-different', '/hiromoon166/2020-basic-starter-kernel', '/corochann/2020-ncaa-eda-all-files-explained', '/warkingleo2000/eda-with-sparse-matrix', '/anshumoudgil/basketball-2020-vectors-feature-engg-strategy', '/robikscube/ncaa-basketball-court-plot-helper-functions', '/latimerb/2020-model-comparison-no-leak-submission', '/paulorzp/kenpom-scraper-2020', '/holoong9291/eda-for-ncaa-march-madness-en-just-for-men', '/gyanendradas/xgboost-train-0-44', '/catadanna/delete-leaked-from-

['/shubhendra7/higgs-boson-analysis', '/makhloufsabir/higgs-boson-classification-physics', '/ssismasterchief/identifying-higgs-boson-atlas-h2o', '/makhloufsabir/higgs-boson-classification-physics-rnn', '/imeintanis/identifying-higgs-boson-t-sne-umap-rapids', '/jiahuali/jiahua-li-higgs-bosons']
../data/repositories/kaggle/competitions/c/higgs-boson\notebooks.json
../data/repositories/kaggle/competitions/c/histopathologic-cancer-detection\notebooks.html
an error occured
['/qitvision/a-complete-ml-pipeline-fast-ai', '/CVxTz/cnn-starter-nasnet-mobile-0-9709-lb', '/vbookshelf/cnn-how-to-use-160-000-images-without-crashing', '/artgor/simple-eda-and-model-in-pytorch', '/fmarazzi/baseline-keras-cnn-roc-fast-10min-0-925-lb', '/suicaokhoailang/wip-densenet121-baseline-with-fastai', '/artgor/cancer-detection-with-kekas', '/abhinand05/histopathologic-cancer-detection-using-cnns', '/sermakarevich/complete-handcrafted-pipeline-in-pytorch-resnet9', '/gomezp/complete-beginner-s-guide-eda-keras-lb-0-93

[]
../data/repositories/kaggle/competitions/c/icdm-2015-drawbridge-cross-device-connections\notebooks.html
an error occured
['/benhamner/exploring-the-drawbridge-data', '/jayjay75/summary', '/willieliao/random-sample-by-country', '/benhamner/reading-bad-csv-files', '/benhamner/t-sne-visualization-of-devices', '/jayjay75/unsupervised-to-supervised', '/benhamner/fixing-bad-csv-files-with-download', '/benhamner/sample-rows-from-each-sqlite-table', '/datayo/first-commit', '/benhamner/sqlite-database-schema', '/pbkaran/sqlite-data-exploration', '/pbkaran/data-exploration', '/dowakin/sql-structure', '/olivetti/ip-join', '/benhamner/accessing-sqlite-from-python', '/cmorton/most-popular-cookie-in-country', '/sujeeth/icdm-2015', '/senbong/repair-csv-file', '/maddarwin/accessing-sqlite-from-python', '/jihyeseo/unzip-eda-dataset-removed']
../data/repositories/kaggle/competitions/c/icdm-2015-drawbridge-cross-device-connections\notebooks.json
../data/repositories/kaggle/competitions/c/ieee-fraud-de

['/zfturbo/benchmark-2019-speed-of-image-reading', '/ateplyuk/inat2019-starter-keras-efficientnet', '/khursani8/fast-ai-ootb-cutout-efficientnet', '/feichin/inception3-last-years-baseline', '/macaodha/basic-inat2019-data-exploration', '/hsinwenchang/keras-data-augmentation-visualize', '/hsinwenchang/keras-mobilenet-data-augmentation-visualize', '/sujoykg/xception-keras', '/interneuron/previous-benchmark-in-a-kernel-v0-0-0', '/jas10022/cnn-hypernetwork', '/s3chwartz/inaturalist-2019-at-fgvc6', '/alainminda/kernel11ab6576ac', '/lowecoryr/learn-from-other-kernels-fork-from-me', '/byhwdy/inat-utils', '/praxitelisk/inaturalist-2019-eda-dl', '/chrisevans/kernelfea8793d3e', '/cedriclacrambe/inaturalist-xception-512']
../data/repositories/kaggle/competitions/c/inaturalist-2019-fgvc6\notebooks.json
../data/repositories/kaggle/competitions/c/inaturalist-challenge-at-fgvc-2017\notebooks.html
['/jihyeseo/image-jpeg']
../data/repositories/kaggle/competitions/c/inaturalist-challenge-at-fgvc-2017\not

an error occured
['/zfturbo/benchmark-2019-speed-of-image-reading', '/seriousran/image-pre-processing-for-wild-images', '/gpreda/iwildcam-2019-eda-and-prediction', '/xhlulu/densenet-transfer-learning-iwildcam-2019', '/ateplyuk/iwildcam2019-pytorch-starter', '/ateplyuk/iwildcam2019-keras-efficientnet', '/xhlulu/reducing-image-sizes-to-32x32', '/artgor/iwildcam-basic-eda', '/tanlikesmath/fastai-starter-iwildcam-2019', '/xhlulu/cnn-baseline-iwildcam-2019', '/rblcoder/cnn-in-tf-coursera-course-iwildcam-2019-mobilenet', '/kokecacao/a-brief-data-exploration-for-iwildcam', '/a45632/eda-iwildcam-2019-v1-1', '/bonhart/pytorch-eda-and-resnet', '/joocheol/train-on-pretrained-model', '/akumaldo/resnet-from-scratch-keras', '/benjibb/fastai-with-senet154', '/twhitehurst3/keras-transfer-learning-iwildcam-2019', '/rbarman/iwildcam-2019-inat-idaho-resized', '/hrush777/data-processing-iwildcam-2019']
../data/repositories/kaggle/competitions/c/iwildcam-2019-fgvc6\notebooks.json
../data/repositories/kaggl

[]
../data/repositories/kaggle/competitions/c/kdd-cup-2014-predicting-excitement-at-donors-choose\notebooks.html
[]
../data/repositories/kaggle/competitions/c/kddcup2012-track1\notebooks.html
[]
../data/repositories/kaggle/competitions/c/kddcup2012-track2\notebooks.html
['/shivashi11/ad-click-prediction']
../data/repositories/kaggle/competitions/c/kddcup2012-track2\notebooks.json
../data/repositories/kaggle/competitions/c/kkbox-churn-prediction-challenge\notebooks.html
an error occured
['/headsortails/should-i-stay-or-should-i-go-kkbox-eda', '/jeru666/did-you-think-of-these-features', '/rastaman/churn-or-no-churn-exploration-data-analysis', '/joshwilkins2013/churn-baby-churn-user-logs', '/jagangupta/processing-huge-datasets-user-log', '/zxql2015/1-autoencoder-with-keras', '/carrie1/exploring-membership-data-and-customer-churn', '/jeru666/memory-reduction-and-data-insights', '/kevinbonnes/r-churn-prediction-baseline', '/hireme/kaggle-please-do-something-lb-0-0000', '/ripcurl/feedforward

../data/repositories/kaggle/competitions/c/landmark-retrieval-2019\notebooks.html
['/anisayari/download-images-dataset-python3-log-progressbar', '/xiuchengwang/python-dataset-download', '/huangxiaoquan/google-landmarks-v2-exploratory-data-analysis-eda', '/grapestone5321/exploration-of-the-dataset', '/automatichourglass/create-a-subset-of-training-dataset', '/pankajgiri/resnet-feature-extraction-pytorch']
../data/repositories/kaggle/competitions/c/landmark-retrieval-2019\notebooks.json
../data/repositories/kaggle/competitions/c/landmark-retrieval-2020\notebooks.html
an error occured
['/mayukh18/creating-submission-from-your-own-model', '/seriousran/google-landmark-retrieval-2020-eda', '/sandy1112/create-and-train-resnet50-from-scratch', '/camaskew/baseline-submission', '/waelkh/landmark2020-delf-model-submission-code', '/vstepanenko/batch-image-viewer', '/suruili/arcface-gem-train-on-tpu', '/nvnnghia/main-0806', '/mattbast/google-landmark-retrieval-triplet-loss', '/rai555/google-landmar

../data/repositories/kaggle/competitions/c/loan-default-prediction\notebooks.html
['/panamby/loan-default-prediction', '/darisdzakwanhoesien2/loan-default-prediction-imperial-college-london', '/rajvidoshi/loan-default-prediction-with-pca', '/abeersaxena/submission-31-08-2020', '/niraligala/loan-default-prediction-n', '/prathyushaprathyu/loan-default-prediction-prathyusha', '/vinaykumars/loan-default-prediction', '/shadab123456/loan-default-prediction-new']
../data/repositories/kaggle/competitions/c/loan-default-prediction\notebooks.json
../data/repositories/kaggle/competitions/c/lshtc\notebooks.html
[]
../data/repositories/kaggle/competitions/c/lyft-motion-prediction-autonomous-vehicles\notebooks.html
an error occured
['/nxrprime/understanding-the-data-catalyst-kekas-baseline', '/corochann/lyft-comprehensive-guide-to-start-competition', '/corochann/lyft-deep-into-the-l5kit-library', '/huanvo/lyft-complete-train-and-prediction-pipeline', '/pestipeti/pytorch-baseline-inference', '/pestip

[]
../data/repositories/kaggle/competitions/c/melbourne-university-seizure-prediction\notebooks.html
an error occured
['/zfturbo/seizure-boost-0-6-lb', '/deepcnn/feature-extractor-matlab2python-translated', '/treina/feature-extractor-matlab2python-translated', '/jeffhebert/seizure-spectrograms', '/solomonk/single-eeg-fft-entropy', '/anokas/seizure-boosting', '/gauss256/let-s-put-the-auc-score-issue-to-rest', '/asterios/proper-cross-validation', '/pakozm/zerorunlengthdistribution', '/deepcnn/spectrogram-pairs', '/bzamecnik/brain-sounds', '/gauss256/analysis-of-safe-labels', '/pakozm/dropoutcounts', '/openneuron/begin-with-r-generate-features-2', '/avilesmarcel/open-mat-in-python-pandas-dataframe', '/changgyu/another-data-corruption', '/vincento/0-6-lb', '/andy101/exploring-the-channel-energies', '/valadi/2016092090', '/alijs1/experiments-with-spectrograms']
../data/repositories/kaggle/competitions/c/melbourne-university-seizure-prediction\notebooks.json
../data/repositories/kaggle/compe

an error occured
['/headsortails/personalised-medicine-eda-with-tidy-r', '/reiinakano/basic-nlp-bag-of-words-tf-idf-word2vec-lstm', '/dextrousjinx/brief-insight-on-genetic-variations', '/sudalairajkumar/simple-exploration-notebook-personalized-medicine', '/bhuvaneshwaran/redefining-cancer-treatment-linear-svc', '/danofer/genetic-variants-to-protein-features', '/umutto/preliminary-data-analysis-using-word2vec', '/alyosama/doc2vec-with-keras-0-77', '/miracl16/basic-xgboost-tfidf-russian-version', '/kevinbonnes/basic-tf-idf-model-0-647-lb', '/dex314/tfidf-truncatedsvd-and-light-gbm', '/clustifier/basic-two', '/merckel/nci-thesaurus-naive-bayes-vs-rf-gbm-glm-dl', '/swanny/splitting-variations-into-features-0-595-lb', '/megemini/modified-the1owl-redefining-treatment-0-57018', '/ryanzhang/use-stage-1-solution-as-stage-2-submission', '/umutto/using-stage-1-test-results-for-stage-2-training', '/raulitoskys/using-lda-to-extract-topics-from-the-text', '/bsivavenu/cancer-treatment-with-machine-le

an error occured
['/threeplusone/sea-lion-coordinates', '/outrunner/use-keras-to-count-sea-lions', '/philschmidt/sea-lion-correlations-cv2-template-matching', '/radustoicescu/use-keras-to-classify-sea-lions-0-91-accuracy', '/radustoicescu/get-coordinates-using-blob-detection', '/asymptote/count-extract-sea-lions', '/radustoicescu/count-the-sea-lions-in-the-first-image', '/andraszsom/predict-the-number-of-pups', '/michaelzxu/counting-dots-and-not-sea-lions', '/kingburrito666/using-base-tensorlfow-to-count-sea-lions-1-22', '/authman/fix-all-the-errors', '/ranbato/finding-the-dots', '/jannis96/short-segmentation-attempt', '/vfdev5/updated-data-exploration', '/davidmercury/visualize-the-sunbath-network-of-sea-lion', '/toshik/relations-between-counts-and-image-size', '/madplanner/data-augmentation-image-cropping-rotation-and-2d', '/jasonquick/use-keras-to-classify-sea-lions-0-91-accuracy', '/oysteijo/just-displaying-the-images', '/nikitabu/brrrr']
../data/repositories/kaggle/competitions/c/

an error occured
['/tqchen/understanding-xgboost-model-on-otto-data', '/hsperr/finding-ensamble-weights', '/cbourguignat/why-calibration-works', '/threecourse/gbdt-implementation-kaggle-days-tokyo', '/tks0123456789/class-wise-feature-importance', '/benhamner/random-forest-benchmark-r-1', '/thakurrajanand/deep-learning-h2o-0-44', '/benhamner/top-10-leaderboard-performance-over-time', '/benhamner/t-sne-visualization-1', '/sushanttripathy/wrapper-for-models-ensemble', '/abhishek/beating-the-benchmark-v2-0', '/ankitdatascience/random-and-bayes-search-hyp-optimization-gpu', '/threecourse/gbdt-implementation-cython-kaggle-days-tokyo', '/mok0na/tc1-projet-otto-xgboost', '/omarelgabry/otto-product-classification-predictions', '/rudikruger/rf-gbm', '/sachinsharma1123/otto-group-classification-acc-82', '/pavansanagapati/stochastic-gradient-boosting-with-xgboost', '/mark42/confusion-matrix-with-probabilities', '/tqchen/testx']
../data/repositories/kaggle/competitions/c/otto-group-product-classifi

../data/repositories/kaggle/competitions/c/planet-understanding-the-amazon-from-space\notebooks.html
an error occured
['/anokas/data-exploration-analysis', '/ekami66/0-92837-on-private-lb-solution-with-keras', '/philschmidt/multilabel-classification-rainforest-eda', '/robinkraft/getting-started-with-the-data-now-with-docs', '/mratsim/starting-kit-for-pytorch-deep-learning', '/anokas/simple-keras-starter', '/opanichev/xgb-starter', '/anokas/fixed-f2-score-in-python', '/hortonhearsafoo/fast-ai-v3-lesson-3-planet', '/petrosgk/keras-vgg19-0-93028-private-lb', '/arsenyinfo/f-beta-score-for-keras', '/kelexu/keras-lb-0-913', '/kbalkoski/initial-eda-image-processing', '/lpachuong/keras-cv-optim', '/sashakorekov/end-to-end-resnet50-with-tta-lb-0-93', '/paulorzp/find-best-f2-score-threshold', '/jcarrillo/machine-learning-for-geospatial-data-workshop-2a', '/hortonhearsafoo/fast-ai-lesson-2', '/fppkaggle/making-tifs-look-normal-using-spectral-fork', '/dengzc/classify-rainforest']
../data/repositor

../data/repositories/kaggle/competitions/c/prudential-life-insurance-assessment\notebooks.html
an error occured
['/casalicchio/use-the-mlr-package-scores-0-649', '/zeroblue/xgboost-with-optimized-offsets', '/pruadmin/starter-script', '/wittmaan/exploring-the-data', '/mariopasquato/linear-model', '/inversion/digitize', '/tdevries/neural-network-example', '/chechir/features-predictive-power', '/pchitta/caret-cv', '/benhamner/xgboost-example-1', '/scirpus/genetic-programming-lb-0-64', '/inspector/keras-hyperopt-example-sketch', '/omarelgabry/prudential-insurance-risk-predictions', '/kkondo/ridge-regression-score-0-55443', '/threecourse/plotting-histograms', '/ashwiniprakash/life-insurance-assesment', '/bobcz3/journey-through-prudential', '/jonathanslomka/python-xgboost-starter', '/abhilashawasthi/xgboost-example-0-61171', '/gauravkkaushik/prudential-random-forest-and-xgb-classifier']
../data/repositories/kaggle/competitions/c/prudential-life-insurance-assessment\notebooks.json
../data/rep

an error occured
['/jesucristo/quick-visualization-eda', '/zaharch/keras-model-boosted-with-plates-leak', '/hmendonca/kaggle-pytorch-utility-script', '/leighplt/densenet121-pytorch', '/tanlikesmath/rcic-fastai-starter', '/xhlulu/recursion-2019-load-resize-and-save-images', '/yhn112/resnet18-baseline-pytorch-ignite', '/hmendonca/fold1h4r3-arcenetb4-2-256px-rcic-lb-0-9759', '/pheaboo/a-journey-through-the-experiment-design', '/giuliasavorgnan/plates-leak-clear-visualization', '/xhlulu/recursion-2-headed-efficientnet-2-stage-training', '/roydatascience/cellular-stacking-1-5', '/chandyalex/recursion-cellular-keras-densenet', '/antgoldbloom/training-using-google-automl', '/bonhart/eda-efficientnet-creating-video-pytorch', '/apap950419/visualizing-batch-effects-with-t-sne', '/apap950419/visualizing-the-effect-of-sirna-treatment', '/gidutz/starter-kernel-recursion-pharmaceuticals', '/christopherberner/hungarian-algorithm-to-optimize-sirna-prediction', '/darraghdog/simple-experiment-view']
../

['/golubev/baseline', '/golubev/mip-optimization-preference-cost-santa2019revenge', '/vipito/fork-of-santa-ip', '/shrutimechlearn/santa-returns-workshop-explorers-wave-1-vs-wave-2', '/aceconhielo/data-analysis-and-patterns-recognition', '/jazivxt/santa-opt-v2', '/seshurajup/eda-for-santa-2019-revenge-of-the-accountants', '/drcapa/santa-tour-revenge-bazaar', '/grapestone5321/santa-2019-sample-submission', '/lparker7/assignment-2', '/jonachehilton/cp2410-assignment-2-jonache-hilton', '/hs999518/santa-clause']
../data/repositories/kaggle/competitions/c/santa-2019-revenge-of-the-accountants\notebooks.json
../data/repositories/kaggle/competitions/c/santa-gift-matching\notebooks.html
an error occured
['/zfturbo/happiness-vs-gift-popularity-v2-0-89', '/zfturbo/infinite-probabilistic-improver-0-931', '/zfturbo/max-flow-with-min-cost-v2-0-9267', '/gaborfodor/improve-with-the-hungarian-method-0-9375', '/golubev/simple-example-min-cost-flow', '/selfishgene/nothing-fancy-just-some-heuristics-0-937

../data/repositories/kaggle/competitions/c/sberbank-russian-housing-market\notebooks.html
an error occured
['/captcalculator/a-very-extensive-sberbank-exploratory-analysis', '/sudalairajkumar/simple-exploration-notebook-sberbank', '/jtremoureux/map-visualizations-with-external-shapefile', '/robertoruiz/dealing-with-multicollinearity', '/arathee2/creating-some-useful-additional-features', '/nigelcarpenter/property-location-attempt-3', '/bguberfain/naive-xgb-lb-0-317', '/sudalairajkumar/feature-engineering-validation-strategy', '/konradb/adversarial-validation-and-other-scary-terms', '/viveksrinivasan/eda-focus-on-missing-values-and-simple-xgb', '/nigelcarpenter/lat-and-longitude-for-all-locations', '/philippsp/exploratory-analysis-sberbank', '/damianpanek/data-exploration-in-r', '/jayatou/xgbregressor-with-gridsearchcv', '/reynaldo/naive-xgb', '/ffisegydd/sklearn-multicollinearity-class', '/schoolpal/lgbm-lb-0-3093-0-3094', '/nigelcarpenter/cleaning-the-data-using-latitude-and-longitude

../data/repositories/kaggle/competitions/c/socialNetwork\notebooks.html
[]
../data/repositories/kaggle/competitions/c/sp-society-camera-model-identification\notebooks.html
an error occured
['/zfturbo/benchmark-2019-speed-of-image-reading', '/zeemeen/i-have-a-clue-what-i-am-doing-noise-patterns', '/inversion/i-have-no-clue-what-i-m-doing-benchmark', '/CVxTz/keras-simple-cnn-starter', '/toshif/ieee-image-manipulation-kernel', '/kmader/transfer-learning-with-inceptionv3', '/merryhunter/very-simple-pca-baseline', '/mkagenius/lets-just-see-some-images', '/golubev/simple-example-mcf-balance-improve-0-976-to-0-984', '/rupeshwadibhasme/transfer-learning-with-vgg16', '/igormunizims/pre-processing-for-data-augmentation', '/mkagenius/are-test-filenames-leaking-anything-likely-no', '/aakashnain/camera-check', '/kmader/cnn-with-gap-for-camera-detection', '/mkagenius/iphone-4s-has-only-drexel-university-campus-photos', '/mkagenius/loading-images-in-r', '/kmader/fourier-analysis-for-spatial-resolutio

an error occured
['/beyondbeneath/geolocation-visualisations', '/dvasyukova/a-linear-model-on-apps-and-labels', '/dvasyukova/brand-and-model-based-benchmarks', '/tonyliu/the-battle-of-smart-phones', '/zfturbo/xgboost-simple-starter', '/cartographic/low-ram-bag-of-apps', '/yibochen/xgboost-in-r-2-27217', '/alexxanderlarko/bag-of-apps-keras-11-08-16', '/xiaoml/bag-of-app-id-python-2-27392', '/dvasyukova/user-portraits', '/mrisdal/voronoi-diagrams-locations-by-brand', '/chechir/keras-on-labels-and-brands', '/iuga77/translate-brand-names-snippet', '/ramirogomez/talkingdata-event-locations-map', '/anokas/beat-the-benchmark-2-42107', '/maddarwin/one-day-in-china-geolocation-animation', '/alexxanderlarko/nnet-keras-10', '/tilii7/t-sne-visualization', '/arjoonn/the-one-table', '/russwilliams/investigating-time-and-day-and-gender']
../data/repositories/kaggle/competitions/c/talkingdata-mobile-user-demographics\notebooks.json
../data/repositories/kaggle/competitions/c/telstra-recruiting-network\

../data/repositories/kaggle/competitions/c/the-seeclickfix-311-challenge\notebooks.html
[]
../data/repositories/kaggle/competitions/c/the-winton-stock-market-challenge\notebooks.html
['/rlagrois/macropca-with-unknown-features', '/zonghao/predicting-stock-returns-by-xgboost', '/rlagrois/testing-macropca-predictive-power-winton']
../data/repositories/kaggle/competitions/c/the-winton-stock-market-challenge\notebooks.json
../data/repositories/kaggle/competitions/c/tmdb-box-office-prediction\notebooks.html
an error occured
['/artgor/eda-feature-engineering-and-model-interpretation', '/kamalchhirang/eda-feature-engineering-lgb-xgb-cat', '/tavoosi/predicting-box-office-revenue-with-random-forest', '/ashishpatel26/now-you-see-me', '/somang1418/eda-lgb-xgb-modelings-with-a-cute-panda-meme', '/shahules/eda-feature-engineering-and-keras-model', '/enric1296/complete-guide-eda-feat-model', '/datark1/tmdb-detailed-eda-and-time-series', '/sjj118/movie-visualization-recommendation-prediction', '/samus

../data/repositories/kaggle/competitions/c/twitter-personality-prediction\notebooks.html
[]
../data/repositories/kaggle/competitions/c/twitter-psychopathy-prediction\notebooks.html
[]
../data/repositories/kaggle/competitions/c/two-sigma-connect-rental-listing-inquiries\notebooks.html
an error occured
['/sudalairajkumar/simple-exploration-notebook-2-connect', '/kashnitsky/topic-6-feature-engineering-and-feature-selection', '/sudalairajkumar/xgb-starter-in-python', '/aikinogard/random-forest-starter-with-numerical-features', '/poonaml/two-sigma-renthop-eda', '/guoday/cv-statistics-better-parameters-and-explaination', '/brandenkmurray/it-is-lit', '/neviadomski/data-exploration-two-sigma-renthop', '/enrique1500/rental-listing-ny-map', '/danjordan/how-to-correctly-load-data-into-r', '/robertoruiz/feature-engineering-1-sentiment-analysis', '/den3b81/improve-perfomances-using-manager-features', '/den3b81/do-managers-matter-some-insights-on-manager-id', '/jxnlco/deduplicating-features', '/ygtc

an error occured
['/headsortails/wiki-traffic-forecast-exploration-wtf-eda', '/muonneutrino/wikipedia-traffic-data-exploration', '/zoupet/predictive-analysis-with-different-approaches', '/cpmpml/smape-weirdness', '/ymlai87416/web-traffic-time-series-forecast-with-4-model', '/arjunsurendran/using-lstm-on-training-data', '/opanichev/simple-model', '/paulorzp/one-line-solution-2nd-stage-final', '/screech/ensemble-of-arima-and-lstm-model-for-wiki-pages', '/safavieh/median-estimation-by-fibonacci-et-al-lb-44-9', '/tunguz/forecast-example-w-prophet-median', '/clustifier/weekend-weekdays', '/dextrousjinx/brief-insight-on-web-traffic-time-series', '/nigelcarpenter/page-median', '/rshally/web-traffic-cross-valid-round-and-wk-lb-44-5', '/merckel/preliminary-investigation-holtwinters-arima', '/chechir/weekend-flag-median-with-wiggle', '/gvyshnya/prophet-class-wrapper', '/gvyshnya/parallel-operations-over-a-pandas-df', '/attollos/time-series-forecast-example-with-prophet']
../data/repositories/kag

[]
../data/repositories/kaggle/competitions/c/yandex-personalized-web-search-challenge\notebooks.html
[]
../data/repositories/kaggle/competitions/c/yelp-recruiting\notebooks.html
[]
../data/repositories/kaggle/competitions/c/yelp-recsys-2013\notebooks.html
[]
../data/repositories/kaggle/competitions/c/yelp-restaurant-photo-classification\notebooks.html
an error occured
['/wendykan/expensive-restaurants-look-like-this', '/valueq/util-for-data-exploration', '/enerrio/data-exploration-yelp-classification', '/wongjingping/naive-benchmark-0-61', '/innerproduct/trying-to-process-images', '/millerintllc/imagine-an-image1', '/anokas/patch-features-rfr', '/dmytrolystopad/imagine-an-image1', '/innerproduct/more-naivete', '/triskelion/shallow-learning', '/yrevar/yelp-training-data-helper', '/innerproduct/imagine-an-image', '/rafaellopes/load-data-with-get-dummies', '/sgovindu/yelp-data-analysis', '/luckygemini/notebook5d5fc80c9f', '/titericz/imagine-an-image3', '/benhamner/sample-photos', '/apara

In [None]:
# download notebooks

path = '../data/repositories/kaggle/competitions/c/'
json_file = 'notebooks.json'
folders = os.listdir(folder_base)
subfolder = 'notebooks/'
urlbase = 'https://www.kaggle.com'

print(len(folders))

for i, folder in enumerate(folders):
    item = os.path.join(path,folder,json_file)
    print('###', i, item)
    if os.path.isfile(item):
        links = load_data(item, fromJson=True)
        #print(len(links), links)
        
        chunk = []
        for j, link in enumerate(links):
            #print(j, link)
        
            temp = link.split('/')
            author = temp[1]
            name = temp[2]
            name = name.replace(' ','_').replace(':','')

            # create folders
            f1 = os.path.join(path, folder, subfolder, author+'/')
            f2 = os.path.join(path, folder, subfolder, author, name+'/')
            create_folder(f1)
            create_folder(f2)
            
            # check notebooks
            file = os.path.join(f2, 'notebook.html')
            #print(file)
            #break
            if not os.path.isfile(file):
                #print('notebooks.html not found', file)
                chunk.append(link)
        
        print('### chunk:', i, 'len:', len(chunk), 'links:', chunk)
        
        # get content for chunk (datasets)
        if len(chunk) > 0:
            result = url_scraper_batch(urlbase, chunk, waitForId='comments', headless = True, quitOnEnd = True)

            for item in result:
                #print(item['url'], len(item['html']))
                file = path + folder + '/' + subfolder + item['url'] + '/notebook.html'
                file = file.replace(' ','_').replace(':','').replace('//','/')
                print('### output file', file)
                store_data(item['html'], file)

419
### 0 ../data/repositories/kaggle/competitions/c/15-071x-the-analytics-edge-competition-spring-2015\notebooks.json
### 1 ../data/repositories/kaggle/competitions/c/15-071x-the-analytics-edge-spring-20152\notebooks.json
### 2 ../data/repositories/kaggle/competitions/c/20-newsgroups-ciphertext-challenge\notebooks.json
### chunk 2 len 0 links []
### 3 ../data/repositories/kaggle/competitions/c/3d-object-detection-for-autonomous-vehicles\notebooks.json
### chunk 3 len 0 links []
### 4 ../data/repositories/kaggle/competitions/c/abstraction-and-reasoning-challenge\notebooks.json
### chunk 4 len 0 links []
### 5 ../data/repositories/kaggle/competitions/c/accelerometer-biometric-competition\notebooks.json
### 6 ../data/repositories/kaggle/competitions/c/acm-sf-chapter-hackathon-big\notebooks.json
### 7 ../data/repositories/kaggle/competitions/c/acm-sf-chapter-hackathon-small\notebooks.json
### 8 ../data/repositories/kaggle/competitions/c/acquire-valued-shoppers-challenge\notebooks.json
###

Page is ready! https://www.kaggle.com/abhinand05/covid-19-digging-a-bit-deeper
Page is ready! https://www.kaggle.com/davidbnn92/weather-data
Page is ready! https://www.kaggle.com/deepakdeepu8978/covid-19-analysis-eda-forecasting
Page is ready! https://www.kaggle.com/pradeepmuniasamy/covid19-inside-story-of-each-countries
Page is ready! https://www.kaggle.com/sambitmukherjee/covid-19-forecasting-with-regression-trees
Page is ready! https://www.kaggle.com/dferhadi/logistic-curve-fitting-global-covid-19-confirmed
Page is ready! https://www.kaggle.com/jasonbenner/lets-try-xgboost-simple-w-added-features
Page is ready! https://www.kaggle.com/corochann/covid-19-effect-of-temperature-humidity
Page is ready! https://www.kaggle.com/group16/sigmoid-per-country-no-leakage
Page is ready! https://www.kaggle.com/super13579/covid19-global-forcast-simple-eda-pr-model
Page is ready! https://www.kaggle.com/shakshisharma/kernelb454ab925d
Page is ready! https://www.kaggle.com/fanconic/covid-19-additional-

Page is ready! https://www.kaggle.com/davidbnn92/weather-data
Page is ready! https://www.kaggle.com/dferhadi/logistic-curve-fitting-global-covid-19-confirmed
Page is ready! https://www.kaggle.com/binhlc/sars-cov-2-exponential-model-week-2
Page is ready! https://www.kaggle.com/eswarchandt/timeseries-forecasting-of-covid-19-week-2-arima
Page is ready! https://www.kaggle.com/corochann/covid-19-effect-of-temperature-humidity
Page is ready! https://www.kaggle.com/aerdem4/covid19-w2-final-v2
Page is ready! https://www.kaggle.com/gaborfodor/covid-19-a-few-charts-and-a-simple-baseline
Page is ready! https://www.kaggle.com/khotijahs1/covid19-forecasting-randomforest
Page is ready! https://www.kaggle.com/dferhadi/global-forecasting-covid-19-random-forest
Page is ready! https://www.kaggle.com/cpmpml/fatalities-prediction-via-linear-regression
Page is ready! https://www.kaggle.com/rohanrao/covid-19-w2-lgb-mad
Page is ready! https://www.kaggle.com/ranjithks/few-lines-of-code-without-data-leak
Page 

Page is ready! https://www.kaggle.com/nitishabharathi/the-story-of-covid-19-in-india-eda-and-prediction
Page is ready! https://www.kaggle.com/davidbnn92/weather-data
Page is ready! https://www.kaggle.com/anjum48/seir-hcd-model
Page is ready! https://www.kaggle.com/dferhadi/covid-19-predictions-growth-factor-and-calculus
Page is ready! https://www.kaggle.com/eswarchandt/covid-19-forecasting-xgboost
Page is ready! https://www.kaggle.com/yuanquan/covid-19-prediction-by-country-and-province
Page is ready! https://www.kaggle.com/corochann/covid-19-effect-of-temperature-humidity
Page is ready! https://www.kaggle.com/aerdem4/covid-19-basic-model-not-leaky
Page is ready! https://www.kaggle.com/eswarchandt/timeseries-forecasting-of-covid-19-week-3-arima
Page is ready! https://www.kaggle.com/madz2000/covid-19-week-3-analysis-prediction
Page is ready! https://www.kaggle.com/mobassir/covid-19-in-bangladesh
Page is ready! https://www.kaggle.com/gaborfodor/covid19-global-forecasting-top-submissions


Page is ready! https://www.kaggle.com/saga21/covid-global-forecast-sir-model-ml-regressions
Page is ready! https://www.kaggle.com/nitishabharathi/the-story-of-covid-19-in-india-eda-and-prediction
Page is ready! https://www.kaggle.com/corochann/covid-19-current-situation-on-december
Page is ready! https://www.kaggle.com/frlemarchand/covid-19-forecasting-with-an-rnn
Page is ready! https://www.kaggle.com/anshuls235/covid19-explained-through-visualizations
Page is ready! https://www.kaggle.com/davidbnn92/weather-data
Page is ready! https://www.kaggle.com/soham1024/covid-19-india-visualization-forecasting
Page is ready! https://www.kaggle.com/dferhadi/covid-19-predictions-growth-factor-and-calculus
Page is ready! https://www.kaggle.com/chekoduadarsh/epidemic-model-covid-19-india-visualizations
Page is ready! https://www.kaggle.com/super13579/covid-19-global-forecast-seir-visualize
Page is ready! https://www.kaggle.com/eswarchandt/timeseries-forecasting-of-covid-19-arima
Page is ready! https

../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-5\notebooks/gaborfodor/ created
../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-5\notebooks/gaborfodor\w5-top-submissions/ created
../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-5\notebooks/abhiparashar/ created
../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-5\notebooks/abhiparashar\eda-covid-19-notebook-1/ created
../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-5\notebooks/philippsinger/ created
../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-5\notebooks/philippsinger\covid-w5-worldometer-scraper/ created
../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-5\notebooks/mathurinache/ created
../data/repositories/kaggle/competitions/c/covid19-global-forecasting-week-5\notebooks/mathurinache\starter-code/ created
../data/repositories/kaggle/competitions/c/co

Page is ready! https://www.kaggle.com/dferhadi/covid-19-predictions-growth-factor-and-calculus
Page is ready! https://www.kaggle.com/mdmahmudferdous/covid-19-us-ca-forecasting-top-4-notebook-6th
Page is ready! https://www.kaggle.com/dferhadi/global-forecasting-covid-19-random-forest
Page is ready! https://www.kaggle.com/panosc/california-curves-vs-other-world-regions
Page is ready! https://www.kaggle.com/tunguz/simple-covid-19-ca-eda
Page is ready! https://www.kaggle.com/dferhadi/covid-19-time-series-starter-code
Page is ready! https://www.kaggle.com/abhijithchandradas/caprediction-linearregression-multiple-growth-rate
Page is ready! https://www.kaggle.com/kmatsuyama/covid-19-ca-by-simple-seir
Page is ready! https://www.kaggle.com/lisphilar/sir-f-model-in-california-usa
Page is ready! https://www.kaggle.com/ceshine/plotly-eda-example
Page is ready! https://www.kaggle.com/akihisayamakawa/covid-19-ca-seir-with-parameter-optimization
Page is ready! https://www.kaggle.com/nickteim/covid19-