In [1]:
# imports
import requests
import json
import os
import sys
import platform
import time
import math
import datetime
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup

In [2]:
# generic store data to file function
def store_data(data, file, mode='w', toJson=False):
    if toJson:
        data = json.dumps(data)
    with open(file, mode, encoding='utf-8') as fp:
        result = fp.write(data)
        return result
    
# generic load data from file function
def load_data(file, fromJson=False):
    if os.path.isfile(file):
        with open(file, 'r', encoding='utf-8', errors="ignore") as fp:
            data = fp.read()
            if fromJson:
                data = json.loads(data)
            return data
    else:
        return 'file not found'

# test text
#print(store_data('Hello', '../data/repositories/mlart/test.txt'))
#print(load_data('../data/repositories/mlart/test.txt'))

# test json
#print(store_data({'msg':'Hello World'}, '../data/repositories/mlart/test.json', toJson=True))
#print(load_data('../data/repositories/mlart/test.json', fromJson=True))

In [3]:
# helper function to create folder create_folder
def create_folder(path):
    if not os.path.exists(os.path.dirname(path)):
        try:
            os.makedirs(os.path.dirname(path))
            print(path + ' created')
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

In [4]:
# scan text for predefined terms

text = 'We use LSTM for anomaly and object detection. As Convolutional Neural Networks are great for ML.'

pd_ml_terms = pd.read_csv('../data/patterns/ml_terms.csv')
ml_terms = pd_ml_terms['Term'].tolist()
ml_slugs = pd_ml_terms['Slug'].tolist()
ml_slugs = [x for x in ml_slugs if str(x) != 'nan']
ml_tags = pd_ml_terms['Tag'].tolist()
ml_tags = [x for x in ml_tags if str(x) != 'nan']

#print(ml_tags)

ml_libs = pd.read_csv('../data/patterns/ml_libraries.csv')
ml_libs = ml_libs['Python Package'].tolist()

def match_text(haystack, needles, toLower = False, unique = True):
    
    if toLower == True:
        haystack = haystack.lower()
        needles = [x.lower() for x in needles]
    
    if unique == True:
        matches = {x for x in needles if x in haystack}
        matches = list(matches)
    else:
        matches = [x for x in needles if x in haystack]
    
    return matches

def match_tags(haystack):
    df = pd.read_csv('../data/patterns/ml_terms.csv')
    tags = []
    
    df.set_index('Term', inplace = True)
    for item in haystack:
        try:
            tag = df.loc[item].get('Tag')
            if not 'nan' in str(tag):
                tags.append(tag)
        except:
            pass
        
    df.set_index('Slug', inplace = True)
    for item in haystack:
        try:
            tag = df.loc[item].get('Tag')
            if not 'nan' in str(tag):
                tags.append(str(tag))
        except:
            pass
        
    #if 'ANN' in tags or 'CNN' in tags or 'RNN' in tags:
    #    tags.remove('NN')
    
    return list(tags)

#ml_slugs, ml_terms, ml_libs, match_text(haystack, needles, toLower = False, unique = True)
needles = {
    'ml_slugs': ml_slugs,
    'ml_terms': ml_terms,
    'ml_libs': ml_libs,
}
needles_need_str_lower = {
    'ml_slugs': False,
    'ml_terms': True,
    'ml_libs': False,
}

matches = []

matches.extend(match_text(text, ml_terms, True))
matches.extend(match_text(text, ml_slugs, False))
print('matches', matches)

tags = match_tags(matches)
print('tags', tags)

matches ['neural network', 'detect', 'object detection', 'convolutional neural network', 'lstm', 'anomaly', 'ML']
tags ['NN', 'Object Detection', 'CNN', 'LSTM', 'ML']


In [5]:
# get file modifictaion date
# https://stackoverflow.com/questions/237079/how-to-get-file-creation-modification-date-times-in-python

def creation_date(path_to_file, datetime = True):
    """
    Try to get the date that a file was created, falling back to when it was
    last modified if that isn't possible.
    See http://stackoverflow.com/a/39501288/1709587 for explanation.
    """
    if platform.system() == 'Windows':
        timestamp = os.path.getctime(path_to_file)
    else:
        stat = os.stat(path_to_file)
        try:
            timestamp = stat.st_birthtime
        except AttributeError:
            # We're probably on Linux. No easy way to get creation dates here,
            # so we'll settle for when its content was last modified.
            timestamp = stat.st_mtime
        
    if datetime == True:
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp))

    return timestamp
    
folder_base = '../data/repositories/kaggle/competitions/c/'
folder = '3d-object-detection-for-autonomous-vehicles/notebooks/asimandia/lyft3d-inference-kernel/'
notebook = 'notebook_02.html'
kernel = 'kernel.html'
print(creation_date(folder_base+folder+notebook))

2020-12-12 16:08:03


In [6]:
# clear text formatting
def clear_text(text):
    return text.replace('\n',' ').replace('\r','').replace('¶','').strip()

In [7]:
# scrape competition

folder_base = '../data/repositories/kaggle/competitions/c/'
folder = '3d-object-detection-for-autonomous-vehicles/'
dataset = 'dataset.html'

def scrape_competition(html):
    soup = BeautifulSoup(html, 'html.parser')
    meta = {}
    
    meta['title'] = soup.find('h1', class_="competition-header__title").text.strip()
    meta['subtitle'] = soup.find('h2', class_="competition-header__subtitle").text.strip()
    meta['type'] = soup.find('p', class_="competition-header__classification-text").text.strip()
    meta['organisation'] = soup.find('span', class_="competition-header__organization-name")
    if meta['organisation'] == None:
        meta['organisation'] = ''
    else:
        meta['organisation'] = meta['organisation'].text.strip()
    temp = soup.find_all('li', class_="horizontal-list-item horizontal-list-item--bullet horizontal-list-item--default")
    for item in temp:
        #print(item.text)
        if 'team' in item.text:
            meta['teams'] = item.text.replace('teams','').replace('team','').strip()
        if 'ago' in item.text:
            #item = BeautifulSoup(item, 'html.parser')
            item = item.select('li>span>span')[0]
            meta['date_closed'] = item.get('title')
    
    meta['description'] = clear_text(soup.find('div', class_="competition-overview__content").text)
    meta['tags'] = [x.text.strip() for x in soup.find_all('span', class_="CategoryButton_CategoryName-sc-c10946 jFsDhg")]
    
    
    

    return meta

html = load_data(folder_base+folder+dataset)
if 'file not found' in html:
    print(html)
meta = scrape_competition(html)
print(meta)
#store_data(links, folder_base+folder+out, toJson=True)

{'title': 'Lyft 3D Object Detection for Autonomous Vehicles', 'subtitle': 'Can you advance the state of the art in 3D object detection?', 'type': 'Featured prediction Competition', 'organisation': 'Lyft', 'teams': '547', 'date_closed': 'Wed Nov 13 2019 00:59:00 GMT+0100 (Mitteleuropäische Normalzeit)', 'description': 'Self-driving technology presents a rare opportunity to improve the quality of life in many of our communities. Avoidable collisions, single-occupant commuters, and vehicle emissions are choking cities, while infrastructure strains under rapid urban growth. Autonomous vehicles are expected to redefine transportation and unlock a myriad of societal, environmental, and economic benefits. You can apply your data analysis skills in this competition to advance the state of self-driving technology. Lyft, whose mission is to improve people’s lives with the world’s best transportation, is investing in the future of self-driving vehicles. Level 5, their self-driving division, is wo

In [8]:
# iterate competions

url = url = 'https://www.kaggle.com/c/'
folder_base = '../data/repositories/kaggle/competitions/c/'
folder = '3d-object-detection-for-autonomous-vehicles/'
dataset = 'dataset.html'
file_out = 'meta.json'

quit = 0 # quit after n files processed / 0 ... no limit

folders = os.listdir(folder_base)
print('folder:', len(folders))
i = 0

runtime_start = time.time()

for folder in folders:
    subfolders = os.listdir(os.path.join(folder_base,folder))
    print('subfolder:', i, len(folders))
    #print(folder)
    #print('###', i, os.path.join(folder_base,folder,subfolder))
    path = os.path.join(folder_base,folder,dataset)
    i += 1

    if os.path.exists(path):
        print(path)
        html = load_data(path)
        if 'file not found' in html:
            print(html)
        meta = scrape_competition(html)
        meta['link'] = url+folder
        #print(meta)
        store_data(meta, os.path.join(folder_base,folder,file_out), toJson=True)
            
    if quit!=0 and i>quit:
        break

runtime_end = time.time()
print('runtime:', round(runtime_end - runtime_start, 3), 'seconds for', i, 'items')

folder: 419
subfolder: 0 419
../data/repositories/kaggle/competitions/c/15-071x-the-analytics-edge-competition-spring-2015\dataset.html
subfolder: 1 419
../data/repositories/kaggle/competitions/c/15-071x-the-analytics-edge-spring-20152\dataset.html
subfolder: 2 419
../data/repositories/kaggle/competitions/c/20-newsgroups-ciphertext-challenge\dataset.html
subfolder: 3 419
../data/repositories/kaggle/competitions/c/3d-object-detection-for-autonomous-vehicles\dataset.html
subfolder: 4 419
../data/repositories/kaggle/competitions/c/abstraction-and-reasoning-challenge\dataset.html
subfolder: 5 419
../data/repositories/kaggle/competitions/c/accelerometer-biometric-competition\dataset.html
subfolder: 6 419
../data/repositories/kaggle/competitions/c/acm-sf-chapter-hackathon-big\dataset.html
subfolder: 7 419
../data/repositories/kaggle/competitions/c/acm-sf-chapter-hackathon-small\dataset.html
subfolder: 8 419
../data/repositories/kaggle/competitions/c/acquire-valued-shoppers-challenge\dataset.

subfolder: 80 419
../data/repositories/kaggle/competitions/c/data-science-bowl-2018\dataset.html
subfolder: 81 419
../data/repositories/kaggle/competitions/c/data-science-bowl-2019\dataset.html
subfolder: 82 419
../data/repositories/kaggle/competitions/c/data-science-for-good-careervillage\dataset.html
subfolder: 83 419
../data/repositories/kaggle/competitions/c/data-science-for-good-city-of-los-angeles\dataset.html
subfolder: 84 419
../data/repositories/kaggle/competitions/c/data-science-london-scikit-learn\dataset.html
subfolder: 85 419
../data/repositories/kaggle/competitions/c/datasciencebowl\dataset.html
subfolder: 86 419
../data/repositories/kaggle/competitions/c/dato-native\dataset.html
subfolder: 87 419
../data/repositories/kaggle/competitions/c/decoding-the-human-brain\dataset.html
subfolder: 88 419
../data/repositories/kaggle/competitions/c/deepfake-detection-challenge\dataset.html
subfolder: 89 419
../data/repositories/kaggle/competitions/c/deloitte-churn-prediction\dataset.

subfolder: 161 419
../data/repositories/kaggle/competitions/c/histopathologic-cancer-detection\dataset.html
subfolder: 162 419
../data/repositories/kaggle/competitions/c/hivprogression\dataset.html
subfolder: 163 419
../data/repositories/kaggle/competitions/c/home-credit-default-risk\dataset.html
subfolder: 164 419
../data/repositories/kaggle/competitions/c/home-depot-product-search-relevance\dataset.html
subfolder: 165 419
../data/repositories/kaggle/competitions/c/homesite-quote-conversion\dataset.html
subfolder: 166 419
../data/repositories/kaggle/competitions/c/hospital\dataset.html
subfolder: 167 419
../data/repositories/kaggle/competitions/c/how-much-did-it-rain\dataset.html
subfolder: 168 419
../data/repositories/kaggle/competitions/c/how-much-did-it-rain-ii\dataset.html
subfolder: 169 419
../data/repositories/kaggle/competitions/c/human-protein-atlas-image-classification\dataset.html
subfolder: 170 419
../data/repositories/kaggle/competitions/c/humpback-whale-identification\dat

subfolder: 241 419
../data/repositories/kaggle/competitions/c/mastercard-data-cleansing-competition-finals\dataset.html
subfolder: 242 419
../data/repositories/kaggle/competitions/c/mdm\dataset.html
subfolder: 243 419
../data/repositories/kaggle/competitions/c/melbourne-university-seizure-prediction\dataset.html
subfolder: 244 419
../data/repositories/kaggle/competitions/c/mens-machine-learning-competition-2018\dataset.html
subfolder: 245 419
../data/repositories/kaggle/competitions/c/mens-machine-learning-competition-2019\dataset.html
subfolder: 246 419
../data/repositories/kaggle/competitions/c/mercari-price-suggestion-challenge\dataset.html
subfolder: 247 419
../data/repositories/kaggle/competitions/c/mercedes-benz-greener-manufacturing\dataset.html
subfolder: 248 419
../data/repositories/kaggle/competitions/c/MerckActivity\dataset.html
subfolder: 249 419
../data/repositories/kaggle/competitions/c/microsoft-malware-prediction\dataset.html
subfolder: 250 419
../data/repositories/kagg

subfolder: 319 419
../data/repositories/kaggle/competitions/c/risky-business\dataset.html
subfolder: 320 419
../data/repositories/kaggle/competitions/c/rossmann-store-sales\dataset.html
subfolder: 321 419
../data/repositories/kaggle/competitions/c/rsna-intracranial-hemorrhage-detection\dataset.html
subfolder: 322 419
../data/repositories/kaggle/competitions/c/rsna-pneumonia-detection-challenge\dataset.html
subfolder: 323 419
../data/repositories/kaggle/competitions/c/rsna-str-pulmonary-embolism-detection\dataset.html
subfolder: 324 419
../data/repositories/kaggle/competitions/c/RTA\dataset.html
subfolder: 325 419
../data/repositories/kaggle/competitions/c/RxVolumePrediction\dataset.html
subfolder: 326 419
../data/repositories/kaggle/competitions/c/santa-2019-revenge-of-the-accountants\dataset.html
subfolder: 327 419
../data/repositories/kaggle/competitions/c/santa-gift-matching\dataset.html
subfolder: 328 419
../data/repositories/kaggle/competitions/c/santa-workshop-tour-2019\dataset.h

subfolder: 398 419
../data/repositories/kaggle/competitions/c/whale-categorization-playground\dataset.html
subfolder: 399 419
../data/repositories/kaggle/competitions/c/whale-detection-challenge\dataset.html
subfolder: 400 419
../data/repositories/kaggle/competitions/c/WhatDoYouKnow\dataset.html
subfolder: 401 419
../data/repositories/kaggle/competitions/c/whats-cooking\dataset.html
subfolder: 402 419
../data/repositories/kaggle/competitions/c/whats-cooking-kernels-only\dataset.html
subfolder: 403 419
../data/repositories/kaggle/competitions/c/WIC2011\dataset.html
subfolder: 404 419
../data/repositories/kaggle/competitions/c/wikichallenge\dataset.html
subfolder: 405 419
../data/repositories/kaggle/competitions/c/wise-2014\dataset.html
subfolder: 406 419
../data/repositories/kaggle/competitions/c/womens-machine-learning-competition-2018\dataset.html
subfolder: 407 419
../data/repositories/kaggle/competitions/c/womens-machine-learning-competition-2019\dataset.html
subfolder: 408 419
../d

In [9]:
# collect all meta.json into a single csv

folder_base = '../data/repositories/kaggle/competitions/c/'
file_meta = 'meta.json'
fp_csv = '../data/database/kaggle_competitions.csv'

quit = 0 # quit after n files processed / 0 ... no limit

folders = os.listdir(folder_base)
print('folder:', len(folders))
i = 0

runtime_start = time.time()
df = pd.DataFrame()

runtime_start = time.time()

for folder in folders:
    subfolders = os.listdir(os.path.join(folder_base,folder))
    print('subfolder:', i, len(folders))
    #print(folder)
    #print('###', i, os.path.join(folder_base,folder,subfolder))
    path = os.path.join(folder_base,folder,file_meta)
    i += 1

    if os.path.exists(path):
        print(path)
        data = load_data(path, fromJson=True)
        if 'file not found' in data:
            print(html)
            
        data['description'] = clear_text(data['description'])

        # date (ignoring GMT+x)
        date_time_str = data['date_closed'].split('GMT')
        date_time_str = date_time_str[0].strip()
        date_time_obj = datetime.datetime.strptime(date_time_str, '%a %b %d %Y %H:%M:%S')
        data['date_closed'] = date_time_obj
        
        df = df.append(data, ignore_index=True)
            
    if quit!=0 and i>quit:
        break
        
runtime_end = time.time()
print('runtime:', round(runtime_end - runtime_start, 3), 'seconds for', i, 'items')
print(df.shape)
print(df.head())
        
# drop columns
#df.drop(columns=['author', 'submission'], inplace=True)

df.to_csv(fp_csv, sep=';', index=False)

folder: 419
subfolder: 0 419
../data/repositories/kaggle/competitions/c/15-071x-the-analytics-edge-competition-spring-2015\meta.json
subfolder: 1 419
../data/repositories/kaggle/competitions/c/15-071x-the-analytics-edge-spring-20152\meta.json
subfolder: 2 419
../data/repositories/kaggle/competitions/c/20-newsgroups-ciphertext-challenge\meta.json
subfolder: 3 419
../data/repositories/kaggle/competitions/c/3d-object-detection-for-autonomous-vehicles\meta.json
subfolder: 4 419
../data/repositories/kaggle/competitions/c/abstraction-and-reasoning-challenge\meta.json
subfolder: 5 419
../data/repositories/kaggle/competitions/c/accelerometer-biometric-competition\meta.json
subfolder: 6 419
../data/repositories/kaggle/competitions/c/acm-sf-chapter-hackathon-big\meta.json
subfolder: 7 419
../data/repositories/kaggle/competitions/c/acm-sf-chapter-hackathon-small\meta.json
subfolder: 8 419
../data/repositories/kaggle/competitions/c/acquire-valued-shoppers-challenge\meta.json
subfolder: 9 419
../da

subfolder: 85 419
../data/repositories/kaggle/competitions/c/datasciencebowl\meta.json
subfolder: 86 419
../data/repositories/kaggle/competitions/c/dato-native\meta.json
subfolder: 87 419
../data/repositories/kaggle/competitions/c/decoding-the-human-brain\meta.json
subfolder: 88 419
../data/repositories/kaggle/competitions/c/deepfake-detection-challenge\meta.json
subfolder: 89 419
../data/repositories/kaggle/competitions/c/deloitte-churn-prediction\meta.json
subfolder: 90 419
../data/repositories/kaggle/competitions/c/deloitte-western-australia-rental-prices\meta.json
subfolder: 91 419
../data/repositories/kaggle/competitions/c/demand-forecasting-kernels-only\meta.json
subfolder: 92 419
../data/repositories/kaggle/competitions/c/denoising-dirty-documents\meta.json
subfolder: 93 419
../data/repositories/kaggle/competitions/c/detecting-insults-in-social-commentary\meta.json
subfolder: 94 419
../data/repositories/kaggle/competitions/c/diabetic-retinopathy-detection\meta.json
subfolder: 95

../data/repositories/kaggle/competitions/c/icdar2013-gender-prediction-from-handwriting\meta.json
subfolder: 172 419
../data/repositories/kaggle/competitions/c/icdar2013-stroke-recovery-from-offline-data\meta.json
subfolder: 173 419
../data/repositories/kaggle/competitions/c/icdm-2015-drawbridge-cross-device-connections\meta.json
subfolder: 174 419
../data/repositories/kaggle/competitions/c/ieee-fraud-detection\meta.json
subfolder: 175 419
../data/repositories/kaggle/competitions/c/imagenet-object-localization-challenge\meta.json
subfolder: 176 419
../data/repositories/kaggle/competitions/c/imaterialist-challenge-fashion-2018\meta.json
subfolder: 177 419
../data/repositories/kaggle/competitions/c/imaterialist-challenge-FGVC2017\meta.json
subfolder: 178 419
../data/repositories/kaggle/competitions/c/imaterialist-challenge-furniture-2018\meta.json
subfolder: 179 419
../data/repositories/kaggle/competitions/c/imaterialist-fashion-2019-FGVC6\meta.json
subfolder: 180 419
../data/repositorie

subfolder: 254 419
../data/repositories/kaggle/competitions/c/msk-redefining-cancer-treatment\meta.json
subfolder: 255 419
../data/repositories/kaggle/competitions/c/multi-modal-gesture-recognition\meta.json
subfolder: 256 419
../data/repositories/kaggle/competitions/c/multilabel-bird-species-classification-nips2013\meta.json
subfolder: 257 419
../data/repositories/kaggle/competitions/c/MusicHackathon\meta.json
subfolder: 258 419
../data/repositories/kaggle/competitions/c/new-york-city-taxi-fare-prediction\meta.json
subfolder: 259 419
../data/repositories/kaggle/competitions/c/nfl-big-data-bowl-2020\meta.json
subfolder: 260 419
../data/repositories/kaggle/competitions/c/nfl-playing-surface-analytics\meta.json
subfolder: 261 419
../data/repositories/kaggle/competitions/c/NFL-Punt-Analytics-Competition\meta.json
subfolder: 262 419
../data/repositories/kaggle/competitions/c/nips-2017-defense-against-adversarial-attack\meta.json
subfolder: 263 419
../data/repositories/kaggle/competitions/c

../data/repositories/kaggle/competitions/c/sberbank-russian-housing-market\meta.json
subfolder: 336 419
../data/repositories/kaggle/competitions/c/second-annual-data-science-bowl\meta.json
subfolder: 337 419
../data/repositories/kaggle/competitions/c/see-click-predict-fix\meta.json
subfolder: 338 419
../data/repositories/kaggle/competitions/c/seizure-detection\meta.json
subfolder: 339 419
../data/repositories/kaggle/competitions/c/seizure-prediction\meta.json
subfolder: 340 419
../data/repositories/kaggle/competitions/c/SemiSupervisedFeatureLearning\meta.json
subfolder: 341 419
../data/repositories/kaggle/competitions/c/sentiment-analysis-on-movie-reviews\meta.json
subfolder: 342 419
../data/repositories/kaggle/competitions/c/severstal-steel-defect-detection\meta.json
subfolder: 343 419
../data/repositories/kaggle/competitions/c/sf-crime\meta.json
subfolder: 344 419
../data/repositories/kaggle/competitions/c/shelter-animal-outcomes\meta.json
subfolder: 345 419
../data/repositories/kagg

subfolder: 418 419
../data/repositories/kaggle/competitions/c/zillow-prize-1\meta.json


NameError: name 'j' is not defined

In [10]:
# scrape notebook content

folder_base = '../data/repositories/kaggle/competitions/c/'
folder = '3d-object-detection-for-autonomous-vehicles/notebooks/asimandia/lyft3d-inference-kernel/'
notebook = 'notebook_02.html'

def scrape_notebook_content(html):
    soup = BeautifulSoup(html, 'html.parser')
    meta = {}
    
    meta['author'] = soup.find('a', class_="sc-paWCZ jvKDQp").get('href').replace('/','')
    meta['title'] = soup.find('a', class_="KernelViewerContext_KernelTitle-sc-rdaqnd").text   
    meta['type'] = soup.find('span', class_="KernelViewerContext_KernelTypeInfo-sc-1l6fza6 kqxzvL").text.replace('using data from','').strip()
    meta['sources'] = soup.find('a', class_="KernelViewerContext_DataSourceUrl-sc-1dm3ij9 lpoMHV").text.strip()
    meta['tags'] = soup.find('span', class_="KernelViewerContext_CategoriesWrapper-sc-8yrjj NgcTE").text.split('·')
    meta['tags'] = list(filter(None, meta['tags']))
    meta['votes'] = soup.find('span', class_="vote-button__vote-count").text
    meta['submission'] = soup.select('div.kernel-code-pane__submission-info-content')
    if len(meta['submission']) > 0:
        meta['submission'] = meta['submission'][0].text
    #meta['votes'] = soup.find('span', class_="vote-button__vote-count").text
    meta['license'] = soup.select('div.kernel-code-pane__subtitle>a')
    if len(meta['license']) > 0:
        meta['license'] = meta['license'][0].text
    
    score = soup.select('div.kernel-code-pane__submission-score-value')
    if len(score) > 0:
        meta['score_private'] = score[0].text
        meta['score_public'] = score[1].text
    
    
    temp = soup.find('span', class_="KernelViewerContext_KernelSubtitle-sc-rltxca esPWpV").select('span') #.text.split('·')
    #print (temp)
    for item in temp:
        #print(item)
        if 'views' in item.text:
            meta['views'] = int(''.join(filter(str.isdigit, item.text)))
        if 'GMT' in item.get('title', 'nan'):
            meta['date'] = item.get('title')
    
    return meta

html = load_data(folder_base+folder+notebook)
if 'file not found' in html:
    print(html)
meta = scrape_notebook_content(html)
print(meta)
#store_data(links, folder_base+folder+out, toJson=True)

{'author': 'asimandia', 'title': 'lyft3d inference kernel', 'type': 'Python notebook', 'sources': 'multiple data sources', 'tags': ['gpu'], 'votes': '38', 'submission': 'Best Submission SuccessfulSubmitted by [ods.ai] blonde & asimandia in Kitti lands a year ago', 'license': 'Apache 2.0', 'score_private': '0.040', 'score_public': '0.040', 'views': 2595, 'date': 'Tue Oct 29 2019 16:27:40 GMT+0100 (Mitteleuropäische Normalzeit)'}


In [11]:
# scrape kernel content

folder_base = '../data/repositories/kaggle/competitions/c/'
folder1 = '3d-object-detection-for-autonomous-vehicles/notebooks/asimandia/lyft3d-inference-kernel/'
folder2 = 'airbnb-recruiting-new-user-bookings/notebooks/datadave/ndcg-score-r/'
notebook = 'notebook_02.html'
kernel = 'kernel.html'

def scrape_kernel_content(html, needles, snippet = False):
    soup = BeautifulSoup(html, 'html.parser')
    
    if snippet == True:
        code = soup.find('div', {"id": "code"}).text
    else:
        code = soup.find('body').text
    #print(code)
    
    meta = {}
    
    for key, value in needles.items():
        meta[key] = match_text(code, value, needles_need_str_lower[key])
        
    meta['ml_tags'] = match_tags(meta['ml_terms'] + meta['ml_slugs'])
        
    meta['description'] = soup.select('div.text_cell_render')
    #print(meta['description'])
    if len(meta['description']) > 0:
        meta['description'] = clear_text(meta['description'][0].text)
    else:
        meta['description'] = ''
    
    return meta

# test for non-kernel-file ('code' embedded in notebook.html)
html = load_data(folder_base+folder2+notebook)
if 'file not found' in html:
    print(html)
meta = scrape_kernel_content(html, needles)
print(meta)

# test for kernel-file ('code' in kernel.html)
html = load_data(folder_base+folder1+kernel)
if 'file not found' in html:
    print(html)
meta = scrape_kernel_content(html, needles)
print(meta)


#store_data(links, folder_base+folder+out, toJson=True)

{'ml_slugs': [], 'ml_terms': ['filter', 'train', 'classif', 'detect', 'rank', 'predict', 'deep learning', 'training data', 'fit'], 'ml_libs': [], 'ml_tags': ['Classification', 'DL'], 'description': ''}
{'ml_slugs': ['AI'], 'ml_terms': ['filter', 'train', 'epoch', 'detect', 'neural network', 'u-net', 'loss', 'convolutional neural network', 'relu', 'label', 'layer', 'predict', 'image segmentation', 'recommend', 'test data', 'fit', 'activation function', 'model'], 'ml_libs': ['pytorch'], 'ml_tags': ['NN', 'U-Net', 'CNN', 'ReLu', 'Image Segmentation', 'AI'], 'description': "Please check out Guido's excellent kernel here. In this kernel i show how to perform inference on test set using the trained model. I just added RaDAM optimzer and got some better score. You can find the BEV of the test set here. Updates: Corrected yaw calculation Used category height information"}


In [13]:
# iterate all folders and compose results into meta.json

url = 'https://www.kaggle.com/'
folder_base = '../data/repositories/kaggle/competitions/'
file_notebook = 'notebook_02.html'
file_kernel = 'kernel.html'
file_out = 'meta.json'

skip = False # if true skip meta collection / set to false to recreate {file_out} from scratch
quit = 10000 # quit after n files processed
breakOnError = False

folders = os.listdir(folder_base)
print('folder:', len(folders))
i = 0
j = 0

for folder in folders:
    subfolders = os.listdir(os.path.join(folder_base,folder))
    print('subfolder:', len(folders))
    for subfolder in subfolders:
        print('###', i, os.path.join(folder_base,folder,subfolder))
        path = os.path.join(folder_base,folder,subfolder, 'notebooks/')
        i += 1
        
        if os.path.exists(path):
            projects = os.listdir(path)

            for author in projects:
                #print('author:', author)
                items = os.listdir(os.path.join(folder_base,folder,subfolder, 'notebooks/', author))

                for notebook in items:
                    j+=1
                    print(' - ', j, 'author:', author, '/ notebook:', notebook)
                    path_notebook = os.path.join(folder_base,folder,subfolder, 'notebooks/', author, notebook, file_notebook)
                    path_kernel = os.path.join(folder_base,folder,subfolder, 'notebooks/', author, notebook, file_kernel)
                    path_out = os.path.join(folder_base,folder,subfolder, 'notebooks/', author, notebook, file_out)
                    
                    meta = {}
                    
                    if skip and os.path.isfile(path_out):
                        print('skipped')
                            
                    else:
                        try:
                            # get meta-data
                            if os.path.isfile(path_notebook):
                                #print('notebook found')
                                meta['scraped_at'] = creation_date(path_notebook)
                                meta['link'] = f'{url}{author}/{notebook}'
                                meta['reference'] = f'{url}{folder}/{subfolder}'
                                html = load_data(path_notebook, fromJson=False)
                                if 'file not found' in html:
                                    print('notebook not found')
                                    break
                                meta.update(scrape_notebook_content(html))

                            # get meta-data
                            if os.path.isfile(path_kernel):
                                #print('kernel found')
                                html = load_data(path_kernel, fromJson=False)
                                if 'file not found' in html:
                                    print('kernel not found')
                                    break
                                meta.update(scrape_kernel_content(html, needles))
                            else:
                                #print('kernel not found')
                                html = load_data(path_notebook, fromJson=False)
                                if 'file not found' in html:
                                    print('notebook not found')
                                    break
                                meta.update(scrape_kernel_content(html, needles))

                            #print(meta)
                            store_data(meta, path_out, toJson=True)
                            
                        except Exception as e:
                            print("Oops!", e.__class__, "occurred.")
                            print(e)
                            if breakOnError:
                                sys.exit()
                
                if j>quit:
                    break
            if j>quit:
                break
    if j>quit:
        break

folder: 1
subfolder: 1
### 0 ../data/repositories/kaggle/competitions/c\15-071x-the-analytics-edge-competition-spring-2015
### 1 ../data/repositories/kaggle/competitions/c\15-071x-the-analytics-edge-spring-20152
### 2 ../data/repositories/kaggle/competitions/c\20-newsgroups-ciphertext-challenge
 -  1 author: a45632 / notebook: classification-tfidf-svm-2-0
 -  2 author: amansohane / notebook: level-3-with-partial-deciphering-0-94-level-3
 -  3 author: ananthu017 / notebook: classification-tfidf-logistic
 -  4 author: ashishpatel26 / notebook: attension-layer-basic-for-nlp
 -  5 author: ashishpatel26 / notebook: beginner-to-intermediate-nlp-tutorial
 -  6 author: ashishpatel26 / notebook: everything-you-want-to-know-about-20-ngctc
 -  7 author: ashishpatel26 / notebook: stratified-kfold-hyperparameter-tuning
 -  8 author: delayedkarma / notebook: some-basic-explorations-lgb-baseline
 -  9 author: interneuron / notebook: difficulty-1-deciphering-wip
 -  10 author: jazivxt / notebook: enig

 -  105 author: hmendonca / notebook: u-net-model-with-submission
 -  106 author: iafoss / notebook: fine-tuning-resnet34-on-ship-detection
 -  107 author: iafoss / notebook: unet34-dice-0-87
 -  108 author: iafoss / notebook: unet34-submission-tta-0-699-new-public-lb
 -  109 author: inversion / notebook: run-length-decoding-quick-start
 -  110 author: julian3833 / notebook: 2-understanding-and-plotting-rle-bounding-boxes
 -  111 author: julian3833 / notebook: 4-exploring-public-models
 -  112 author: kmader / notebook: baseline-u-net-model-part-1
 -  113 author: kmader / notebook: from-trained-u-net-to-submission-part-2
 -  114 author: kmader / notebook: transfer-learning-for-boat-or-no-boat
 -  115 author: kotarojp / notebook: first-step-for-submission-u-net-tta
 -  116 author: leighplt / notebook: pytorch-tutorial-dataset-data-preparetion-stage
 -  117 author: meaninglesslives / notebook: airbus-ship-detection-data-visualization
 -  118 author: npatta01 / notebook: naive-model
 -  1

 -  210 author: gunesevitan / notebook: ashrae-ucf-spider-and-eda-full-test-labels
 -  211 author: hmendonca / notebook: starter-eda-and-feature-selection-ashrae3
 -  212 author: isaienkov / notebook: keras-nn-with-embeddings-for-cat-features-1-15
 -  213 author: isaienkov / notebook: lightgbm-fe-1-19
 -  214 author: jaseziv83 / notebook: a-deep-dive-eda-into-all-variables
 -  215 author: kailex / notebook: ac-dc
 -  216 author: kimtaegwan / notebook: what-s-your-cv-method
 -  217 author: nroman / notebook: eda-for-ashrae
 -  218 author: nz0722 / notebook: aligned-timestamp-lgbm-by-meter-type
 -  219 author: patrick0302 / notebook: locate-cities-according-weather-temperature
 -  220 author: purist1024 / notebook: ashrae-simple-data-cleanup-lb-1-08-no-leaks
 -  221 author: rohanrao / notebook: ashrae-half-and-half
 -  222 author: ryches / notebook: simple-lgbm-solution
 -  223 author: sudalairajkumar / notebook: simple-exploration-notebook-ashrae
 -  224 author: yamsam / notebook: ashra

 -  319 author: pradeepmuniasamy / notebook: comparative-study-of-models-geotab-inertsection
 -  320 author: pradeepmuniasamy / notebook: extensive-eda-and-modelling-geotab-inertsection
 -  321 author: prazhant / notebook: predicting-wait-times-at-intersections
 -  322 author: pulkitmehtawork1985 / notebook: beating-benchmark
 -  323 author: ragnar123 / notebook: feature-engineering-and-forward-feature-selection
 -  324 author: sanikamal / notebook: bqml-predict-wait-times
 -  325 author: sirtorry / notebook: bigquery-ml-template-intersection-congestion
 -  326 author: snugyun01 / notebook: bigqueryml-starter-code
 -  327 author: tunguz / notebook: adversarial-geotab
 -  328 author: vikassingh1996 / notebook: thoughtful-eda-feature-engineering-and-lightgbm
 -  329 author: whatust / notebook: geotab-congestion
### 35 ../data/repositories/kaggle/competitions/c\bike-sharing-demand
 -  330 author: apapiu / notebook: predicting-bike-sharing-with-xgboost
 -  331 author: benhamner / notebook:

 -  428 author: joconnor / notebook: python-xgboost-starter-0-209-public-mcc
 -  429 author: jpmiller / notebook: flowpath-viz
 -  430 author: laurae2 / notebook: what-s-in-the-kaggle-docker
 -  431 author: mcosch / notebook: leaky-36-in-r
 -  432 author: mmueller / notebook: road-2-0-4
 -  433 author: rithal / notebook: magic-feature-visualization
 -  434 author: rohanrao / notebook: r-implementation-of-mcc-optimization
 -  435 author: scirpus / notebook: loo-template-for-low-memory
### 42 ../data/repositories/kaggle/competitions/c\boston-data-festival-hackathon
### 43 ../data/repositories/kaggle/competitions/c\career-con-2019
 -  436 author: anjum48 / notebook: leakage-within-the-train-dataset
 -  437 author: artgor / notebook: basic-pytorch-lstm
 -  438 author: artgor / notebook: bayesian-optimization-for-robots
 -  439 author: artgor / notebook: where-do-the-robots-drive
 -  440 author: friedchips / notebook: the-missing-link
 -  441 author: gpreda / notebook: robots-need-help
 -  

 -  540 author: hurlburt / notebook: age-and-screeners
 -  541 author: hurlburt / notebook: diagnosiscodesetcv2
 -  542 author: hurlburt / notebook: getting-installed-python-modules
 -  543 author: hurlburt / notebook: plot-test3
 -  544 author: hurlburt / notebook: plottest4
 -  545 author: hurlburt / notebook: plottest5
 -  546 author: paulperry / notebook: more-sqlite-testing
 -  547 author: paulperry / notebook: testing-sqlite
 -  548 author: paweljankiewicz / notebook: pawel-maks-insights
### 50 ../data/repositories/kaggle/competitions/c\challenges-in-representation-learning-facial-expression-recognition-challenge
 -  549 author: agrjarastogi / notebook: ic-sentimentanalysis
 -  550 author: agrjarastogi / notebook: ic-sentimentanalysis-e9dd27
 -  551 author: alexbsantos / notebook: ic-reconhecimento-emocoes-deep
 -  552 author: crucifierbladex / notebook: fer-challenge
 -  553 author: dolmangksun / notebook: facial-expression-ai
 -  554 author: drcapa / notebook: facial-expression

 -  648 author: hasnainajmal281 / notebook: iterative-cnn-approach
 -  649 author: jamesmcguigan / notebook: game-of-life-hashmap-solver
 -  650 author: jamesmcguigan / notebook: game-of-life-z3-constraint-satisfaction
 -  651 author: jpmiller / notebook: demo-cython-generator-and-keras-cnn
 -  652 author: li325040229 / notebook: simple-lgb-model-model-using-only-0-6-test-data
 -  653 author: li325040229 / notebook: the-game-of-life-display-of-cell-changes
 -  654 author: li325040229 / notebook: the-game-of-life-reverse-with-random-forest
 -  655 author: markuskarmann / notebook: 3rd-place-solution-part
 -  656 author: maxjeblick / notebook: crgl2020-iterative-cnn-approach-with-postproces
 -  657 author: parmarsuraj99 / notebook: a-neural-cnn-game-of-life-with-keras
 -  658 author: rohitiscute / notebook: cnn-conway-s-reverse-game-of-life-2020
 -  659 author: seraphwedd18 / notebook: application-of-gan-for-predicting-initial-state
 -  660 author: ulrich07 / notebook: quick-neighborhood

 -  756 author: jorijnsmit / notebook: mathematical-solution-to-sigmoid-parameters
 -  757 author: madz2000 / notebook: covid-19-week-3-analysis-prediction
 -  758 author: mdmahmudferdous / notebook: covid-19-italy-forecasting-fb-prophet
 -  759 author: mobassir / notebook: covid-19-in-bangladesh
 -  760 author: mrmorj / notebook: covid-19-eda-xgboost
 -  761 author: nitishabharathi / notebook: the-story-of-covid-19-in-india-eda-and-prediction
 -  762 author: ritarana123 / notebook: kernel6bb9d38623
 -  763 author: yuanquan / notebook: covid-19-prediction-by-country-and-province
### 69 ../data/repositories/kaggle/competitions/c\covid19-global-forecasting-week-4
 -  764 author: aestheteaman01 / notebook: covtan-covid-19-timeseries-analysis-notebook
 -  765 author: anshuls235 / notebook: covid19-explained-through-visualizations
 -  766 author: chekoduadarsh / notebook: epidemic-model-covid-19-india-visualizations
 -  767 author: corochann / notebook: covid-19-current-situation-on-decembe

 -  861 author: kmader / notebook: opencv-hog-submission
 -  862 author: kmader / notebook: pretrained-pspnet-on-driving-scenes
 -  863 author: kmader / notebook: vehicle-unet-fcl-segmentation
 -  864 author: lantingguo / notebook: get-a-few-images-and-labels-for-local-prototype
 -  865 author: lcantat / notebook: build-database-from-text-files
 -  866 author: mattobrien415 / notebook: improving-masks-creating-borders-between-objects
### 78 ../data/repositories/kaggle/competitions/c\DarkWorlds
### 79 ../data/repositories/kaggle/competitions/c\data-science-bowl-2017
 -  867 author: akh64bit / notebook: full-preprocessing-tutorial
 -  868 author: amorsili / notebook: fast-exploratory-data-analysis-in-r
 -  869 author: ankasor / notebook: improved-lung-segmentation-using-watershed
 -  870 author: anokas / notebook: exploratory-data-analysis-4
 -  871 author: apapiu / notebook: exploratory-analysis-visualization
 -  872 author: armamut / notebook: getting-the-lungs-right
 -  873 author: ar

 -  968 author: ahmedmurad1990 / notebook: data-science-london-scikit-learn
 -  969 author: alexlichtenberg / notebook: scikit-learn-getting-started
 -  970 author: aliwagdy / notebook: data-science-london-with-sklearn
 -  971 author: aman9d / notebook: data-science-london-scikit
 -  972 author: benscaria / notebook: data-science-london-sklearn
 -  973 author: chahat1 / notebook: data-science-london-classification
 -  974 author: etoile33 / notebook: kaggle01
 -  975 author: gsethi2409 / notebook: compare-performance-metrics-dt-knn-svc-rf-mlp
 -  976 author: intu290 / notebook: classifer-london-scikit
 -  977 author: julienmihai / notebook: data-science-london-classification
 -  978 author: pranaymns / notebook: datascience-london-sklearn-rfc-svm
 -  979 author: rishikoush / notebook: predicting-best-among-knn-dt-lr-svc-rf-mlp
 -  980 author: rizkioktafianto / notebook: london-sklearn-using-xgboost-fine-tuned
 -  981 author: ruchibahl18 / notebook: neural-network-version
 -  982 author

 -  1072 author: gaborfodor / notebook: dog-breed-pretrained-keras-models-lb-0-3
 -  1073 author: jesucristo / notebook: private-lb-simulation-i
 -  1074 author: johngull / notebook: breed-distribution
 -  1075 author: kaggleslayer / notebook: simple-convolutional-n-network-with-tensorflow
 -  1076 author: kmldas / notebook: beginner-s-guide-image-augmentation-transforms
 -  1077 author: methindor / notebook: dogbreeddatavisualisation
 -  1078 author: mrdbourke / notebook: tensorflow-2-x-tensorflow-hub-end-to-end-example
 -  1079 author: nafisur / notebook: dog-breed-identification-keras-cnn-basic
 -  1080 author: nirajpoudel / notebook: dogbreedidentification
 -  1081 author: phylake1337 / notebook: 0-18-loss-simple-feature-extractors
 -  1082 author: pvlima / notebook: use-pretrained-pytorch-models
 -  1083 author: robhardwick / notebook: xception-inceptionv3-ensemble-methods
 -  1084 author: salmaneunus / notebook: computer-vision-fundamentals
 -  1085 author: snide713 / notebook: f

 -  1179 author: gpreda / notebook: overfitting-the-private-leaderboard
 -  1180 author: iavinas / notebook: simple-short-solution-don-t-overfit-0-848
 -  1181 author: ishivinal / notebook: feature-importance-techniques
 -  1182 author: jahaziel / notebook: simple-model-glmnet
 -  1183 author: melondonkey / notebook: bayesian-spike-and-slab-in-pymc3
 -  1184 author: miroslavsabo / notebook: auc-0-844-in-11-loc
 -  1185 author: plasticgrammer / notebook: don-t-overfit-i-try
 -  1186 author: rafjaa / notebook: dealing-with-very-small-datasets
 -  1187 author: vincentlugat / notebook: logistic-regression-rfe
 -  1188 author: zachmayer / notebook: first-place-solution
### 101 ../data/repositories/kaggle/competitions/c\DontGetKicked
 -  1189 author: ayusheeagarwal / notebook: don-t-get-kicked
 -  1190 author: cherednichenkoa / notebook: do-not-get-kicked-keras
 -  1191 author: dhruvgupta2801 / notebook: don-t-get-kicked
 -  1192 author: funxexcel / notebook: don-t-get-kicked-pipeline-feat-e

### 112 ../data/repositories/kaggle/competitions/c\expedia-hotel-recommendations
 -  1286 author: ajay1216 / notebook: practical-guide-on-data-preprocessing-in-python
 -  1287 author: ccccat / notebook: r-version-of-most-popular-local-hotel
 -  1288 author: chipmonkey / notebook: channel-is-different-test-v-train
 -  1289 author: company / notebook: ehr-1
 -  1290 author: domesc / notebook: explore-data
 -  1291 author: dvasyukova / notebook: predict-hotel-type-with-pandas
 -  1292 author: dvasyukova / notebook: the-locations-puzzle
 -  1293 author: gaborfodor / notebook: last-week-xkcd
 -  1294 author: gaborfodor / notebook: latent-destination-features
 -  1295 author: gaborfodor / notebook: time-dimension
 -  1296 author: jiaofenx / notebook: expedia-hotel-recommendations
 -  1297 author: jiweiliu / notebook: most-popular-local-hotels
 -  1298 author: josealberto / notebook: destination-clusters
 -  1299 author: omarelgabry / notebook: explore-expedia-search-data
 -  1300 author: sig

 -  1394 author: benhamner / notebook: flatness-boosting-example
 -  1395 author: benhamner / notebook: rf-xgboost-example
 -  1396 author: domcastro / notebook: rf-xgboost-keras
 -  1397 author: domcastro / notebook: rf-xgboost-keras-flatline
 -  1398 author: fchollet / notebook: keras-starter-code-deep-pyramidal-mlp
 -  1399 author: gramolin / notebook: histograms
 -  1400 author: harshaneel / notebook: check-you-agreement-correlation-and-roc
 -  1401 author: holzner / notebook: candidate-mass
 -  1402 author: josefslavicek / notebook: simplified-version-of-my-solution
 -  1403 author: justfor / notebook: gridsearchcv-with-feature-in-xgboost
 -  1404 author: karma86 / notebook: rf-xgboost-keras-flatline-v-2-0
 -  1405 author: phunter / notebook: gridsearchcv-with-feature-in-xgboost
 -  1406 author: rakhlin / notebook: abcde
 -  1407 author: triskelion / notebook: testing-python-3
 -  1408 author: vicensgaitan / notebook: clipping-spreading
 -  1409 author: vicensgaitan / notebook: t-

 -  1502 author: ilyamich / notebook: mfcc-implementation-and-tutorial
 -  1503 author: kmader / notebook: spectrogram-classifier-mobilenet
 -  1504 author: matthewa313 / notebook: removing-uninformative-parts-of-audio-files
 -  1505 author: mpotma / notebook: learndatascience-presentation-lgbm-lb-0-836
 -  1506 author: nafisur / notebook: beginner-s-guide-to-audio-data-90b7f7
 -  1507 author: opanichev / notebook: lightgbm-baseline
 -  1508 author: saitanya / notebook: audio-recognition
 -  1509 author: sunqpark / notebook: data-loader-for-pytorch-with-mfcc
 -  1510 author: tanulsingh077 / notebook: audio-albumentations-transform-your-audio
 -  1511 author: thebrownviking20 / notebook: xgb-using-lda-and-mfcc-opanichev-s-features
### 132 ../data/repositories/kaggle/competitions/c\freesound-audio-tagging-2019
 -  1512 author: ashishpatel26 / notebook: feature-extraction-from-audio
 -  1513 author: carlolepelaars / notebook: bidirectional-lstm-for-audio-labeling-with-keras
 -  1514 autho

 -  1604 author: gauravjoshi1986 / notebook: ghostbuster-data
 -  1605 author: hhllcks / notebook: comparison-between-classifiers
 -  1606 author: hhllcks / notebook: neural-net-with-gridsearch
 -  1607 author: lilyelizabethjohn / notebook: standardization-using-standardscaler
 -  1608 author: mikhailg0 / notebook: monsters-classification-solution
 -  1609 author: netopedro / notebook: nn-approach-to-ghouls-goblins-and-ghosts
 -  1610 author: oysteijo / notebook: ghosts-n-goblins-n-neural-networks-lb-0-74858
 -  1611 author: quadmx08 / notebook: monsters-first-submission
 -  1612 author: samratp / notebook: machine-learning-with-ghouls-goblins-and-ghosts
 -  1613 author: shahules / notebook: ghouls-goblins-and-ghosts
 -  1614 author: sudalairajkumar / notebook: simple-exploration-notebook-1
 -  1615 author: xingobar / notebook: ghost-data-visualization
 -  1616 author: yoyocm / notebook: let-s-explore-and-classify-monsters
 -  1617 author: yoyocm / notebook: why-goblins-classification-

 -  1706 author: darwinwin / notebook: ncaaw20-eda-and-nn-lgb-catb-starter-7c65f8
 -  1707 author: darwinwin / notebook: ncaaw2020-lightgbm-k-fold-on-fire-viz
 -  1708 author: hamditarek / notebook: ncaaw20-eda-and-nn-lgb-catb-starter-7c65f8
 -  1709 author: hiromoon166 / notebook: 2020-women-s-starter-kernel
 -  1710 author: immvab / notebook: nn-starter-tensorflow
 -  1711 author: jaseziv83 / notebook: applying-pythagorean-expectation-to-major-sports
 -  1712 author: khoongweihao / notebook: ncaaw2020-lightgbm-k-fold-on-fire-viz
 -  1713 author: lucabasa / notebook: are-men-s-and-women-s-tournaments-different
 -  1714 author: mika30 / notebook: madness-at-home-and-on-the-court-part-2
 -  1715 author: parulpandey / notebook: decoding-march-madness
 -  1716 author: robikscube / notebook: 2020-march-madness-data-first-look-eda
 -  1717 author: robikscube / notebook: ncaa-basketball-court-plot-helper-functions
 -  1718 author: takaishikawa / notebook: no-ml-modeling-ncaaw2020
 -  1719 au

 -  1818 author: drobchak1988 / notebook: herbarium-2020-fgvc7-create-tfrecords-tensorflow
 -  1819 author: gb00000 / notebook: herb-nn
 -  1820 author: jagannathrk / notebook: herbarium-2020
 -  1821 author: jullang / notebook: herbarium-via-resnet50-and-3-step-classification
 -  1822 author: khotijahs1 / notebook: identify-plant-species-from-herbarium-specimens
 -  1823 author: michaelschastlivcev / notebook: herbarium-2020-pytorch
 -  1824 author: riabovanderew / notebook: h-2fc-ce-d
 -  1825 author: rivilcan / notebook: herbarium-efficientnetb3
 -  1826 author: rsingh99 / notebook: getting-started-with-herbarium-2020
 -  1827 author: seraphwedd18 / notebook: herbarium-consolidating-the-details
 -  1828 author: sergey55 / notebook: herbarium-2020-notebook
 -  1829 author: shaunthesheep / notebook: fgvc7-herbarium-2020-data-viz
 -  1830 author: tathagatbanerjee / notebook: herbarium
 -  1831 author: thejravichandran / notebook: herbarium-2020-competition
 -  1832 author: tkm123456 / 

 -  1928 author: avijeetsingh1608 / notebook: clean-display-of-individual-records1
 -  1929 author: avijeetsingh1608 / notebook: sorry-don-t-run-running-out-of-ram
 -  1930 author: benhamner / notebook: default-r-text
 -  1931 author: devinanzelmo / notebook: component-cdf-s-and-sample-predictions
 -  1932 author: devinanzelmo / notebook: fiddling-with-xgb
 -  1933 author: devinanzelmo / notebook: kde-and-scatter-plot
 -  1934 author: devinanzelmo / notebook: log-histogram-of-label-values-version1
 -  1935 author: elenacuoco / notebook: splitmeandata-py
 -  1936 author: jamesallen1 / notebook: test-r
 -  1937 author: jetheurer / notebook: liz-python
 -  1938 author: jetheurer / notebook: liz-test
 -  1939 author: jihyeseo / notebook: rain-eda-need-to-unzip
 -  1940 author: mcf171 / notebook: notebookb0a53957ff
 -  1941 author: mlandry / notebook: clean-display-of-individual-records
 -  1942 author: mlandry / notebook: simple-benchmark-improvements
 -  1943 author: suchith0312 / noteboo

 -  2042 author: shahules / notebook: tackling-class-imbalance
 -  2043 author: tunguz / notebook: adversarial-ieee
 -  2044 author: vincentlugat / notebook: ieee-lgb-bayesian-opt
 -  2045 author: xhlulu / notebook: ieee-fraud-xgboost-with-gpu-fit-in-40s
### 175 ../data/repositories/kaggle/competitions/c\imagenet-object-localization-challenge
 -  2046 author: neerajnair / notebook: kernel76721cc377
### 176 ../data/repositories/kaggle/competitions/c\imaterialist-challenge-fashion-2018
 -  2047 author: aguyhasnoname / notebook: clean-img-rmv-face-bg-detect-chge-skin
 -  2048 author: am1to2 / notebook: data-exploration-and-analysis
 -  2049 author: anqitu / notebook: data-leakage-findings-from-validation
 -  2050 author: anqitu / notebook: for-starter-json-to-multilabel-in-24-seconds
 -  2051 author: badalgupta / notebook: simple-data-exploration
 -  2052 author: blastchar / notebook: imaterialist-challenge-r-or-not-r
 -  2053 author: dfanghu / notebook: gui-for-further-annotating-and-vie

 -  2144 author: ttahara / notebook: imet2019-chainer-starter-seresnet152-focalloss
 -  2145 author: xiuchengwang / notebook: keras-xception-fine-turning-facol-loss
 -  2146 author: zfturbo / notebook: benchmark-2019-speed-of-image-reading
### 182 ../data/repositories/kaggle/competitions/c\imet-2020-fgvc7
 -  2147 author: alexdesiqueira / notebook: imet-collection-2020-fgvc7-dataset
 -  2148 author: alimbekovkz / notebook: yandex-praktikum-pytorch-resnet50-inference
 -  2149 author: alimbekovkz / notebook: yandex-praktikum-pytorch-train-baseline-lb-0-699
 -  2150 author: ashkhagan / notebook: imet2020
 -  2151 author: ateplyuk / notebook: keras-imet2020-infer
 -  2152 author: ateplyuk / notebook: keras-imet2020-tpu-train
 -  2153 author: dimakyn / notebook: multi-label-keras
 -  2154 author: finlaymacrae / notebook: fastai-resnet34-transfer-learning
 -  2155 author: grapestone5321 / notebook: imet-collection-2020-sample-submission
 -  2156 author: jesucristo / notebook: imet2020-visual

 -  2252 author: enrique1500 / notebook: predict-oeis-with-markov-chains
 -  2253 author: enrique1500 / notebook: predict-oeis-with-ngrams-2
 -  2254 author: garethjns / notebook: classifying-tagging-sequences
 -  2255 author: javiervlab / notebook: integer-sequences-and-machine-learning
 -  2256 author: juliojaavier / notebook: sequence-learning
 -  2257 author: lukeaanderso / notebook: lstm-integer-sequence-testing
 -  2258 author: megaherz / notebook: naive-polinomical-prediction
 -  2259 author: mlanier / notebook: predict-oeis-with-markov-chains
 -  2260 author: ncchen / notebook: match-test-set-to-training-set
 -  2261 author: ncchen / notebook: recurrence-relation
 -  2262 author: shivaramkrs / notebook: classification-of-series
 -  2263 author: suruili / notebook: simple-ngram-with-python-and-no-lib
 -  2264 author: uioreanu / notebook: notebook-7e93afd402a4301e1421
### 192 ../data/repositories/kaggle/competitions/c\intel-mobileodt-cervical-cancer-screening
 -  2265 author: aam

### 197 ../data/repositories/kaggle/competitions/c\jigsaw-multilingual-toxic-comment-classification
 -  2365 author: abhishek / notebook: i-like-clean-tpu-training-kernels-i-can-not-lie
 -  2366 author: abhishek / notebook: inference-of-bert-tpu-model-ml-w-validation
 -  2367 author: faressayah / notebook: natural-language-processing-nlp-for-beginners
 -  2368 author: hamditarek / notebook: ensemble
 -  2369 author: jpmiller / notebook: augmenting-the-data
 -  2370 author: miklgr500 / notebook: jigsaw-tpu-bert-with-huggingface-and-keras
 -  2371 author: mobassir / notebook: understanding-cross-lingual-models
 -  2372 author: pavansanagapati / notebook: 14-simple-tips-to-save-ram-memory-for-1-gb-dataset
 -  2373 author: rftexas / notebook: ml-cheatsheet-a-mind-map-for-nlp
 -  2374 author: rftexas / notebook: nlp-cheatsheet-master-nlp
 -  2375 author: riblidezso / notebook: train-from-mlm-finetuned-xlm-roberta-large
 -  2376 author: shahules / notebook: tackle-with-label-smoothing-proved

### 206 ../data/repositories/kaggle/competitions/c\kdd-cup-2013-author-disambiguation
### 207 ../data/repositories/kaggle/competitions/c\kdd-cup-2013-author-paper-identification-challenge
### 208 ../data/repositories/kaggle/competitions/c\kdd-cup-2014-predicting-excitement-at-donors-choose
### 209 ../data/repositories/kaggle/competitions/c\kddcup2012-track1
### 210 ../data/repositories/kaggle/competitions/c\kddcup2012-track2
 -  2466 author: shivashi11 / notebook: ad-click-prediction
### 211 ../data/repositories/kaggle/competitions/c\kkbox-churn-prediction-challenge
 -  2467 author: aroragaurav / notebook: 1-merging-the-data-sets-and-memory-reduction
 -  2468 author: carrie1 / notebook: exploring-membership-data-and-customer-churn
 -  2469 author: guiyom / notebook: user-logs-csv-reduce-memory-with-new-tips
 -  2470 author: headsortails / notebook: should-i-stay-or-should-i-go-kkbox-eda
 -  2471 author: hireme / notebook: kaggle-please-do-something-lb-0-0000
 -  2472 author: jagangupta

notebook not found
 -  2568 author: andypenrose / notebook: pytorch-training-inference-efficientnet-b4
 -  2569 author: anshuls235 / notebook: google-landmark-recognition-eda
 -  2570 author: azaemon / notebook: eda-data-augmentation-for-beginners
 -  2571 author: camaskew / notebook: host-baseline-example
 -  2572 author: chandanverma / notebook: baseline-landmark-recognition-0-4832
 -  2573 author: chirag9073 / notebook: landmark-recognition-exploratory-data-analysis
 -  2574 author: chumajin / notebook: eda-for-biginner-updated-to-english-ver
 -  2575 author: jagdmir / notebook: google-landmark-prediction-2020
 -  2576 author: mohammedessam97 / notebook: organizer-s-code-submission
 -  2577 author: namanj27 / notebook: eda-google-landmark-recognition-2020
 -  2578 author: paulorzp / notebook: baseline-landmark-recognition-lb-0-48
 -  2579 author: ragnar123 / notebook: efficientnetb3-data-pipeline-and-model
 -  2580 author: rhtsingh / notebook: pytorch-landmark-or-non-landmark-identi

 -  2671 author: alexanderlazarev / notebook: simple-keras-1d-cnn-features-split
 -  2672 author: anilnarassiguin / notebook: ml-classic-pipeline-python-xgboost
 -  2673 author: asparago / notebook: 3-basic-classifiers-and-features-correlation
 -  2674 author: bmetka / notebook: logistic-regression
 -  2675 author: felixsoul / notebook: basic-neural-network-using-tensorflow
 -  2676 author: group16 / notebook: shapelets
 -  2677 author: hooseygoose / notebook: directory-structure-and-moving-files
 -  2678 author: jeffd23 / notebook: 10-classifier-showdown-in-scikit-learn
 -  2679 author: jiashenliu / notebook: updatedtry-5-different-classifiers-and-questions
 -  2680 author: lorinc / notebook: feature-extraction-from-images
 -  2681 author: lorinc / notebook: feature-extraction-v4
 -  2682 author: najeebkhan / notebook: neural-network-through-keras
 -  2683 author: selfishgene / notebook: visualizing-k-means-with-leaf-dataset
 -  2684 author: selfishgene / notebook: visualizing-pca-wit

 -  2776 author: ryches / notebook: lyft-constant-velocity-extrapolation-baseline
 -  2777 author: tuckerarrants / notebook: lyft-ensembling-raster-sizes
### 232 ../data/repositories/kaggle/competitions/c\m5-forecasting-accuracy
 -  2778 author: anshuls235 / notebook: time-series-forecasting-eda-fe-modelling
 -  2779 author: girmdshinsei / notebook: for-japanese-beginner-with-wrmsse-in-lgbm
 -  2780 author: harupy / notebook: m5-baseline
 -  2781 author: headsortails / notebook: back-to-predict-the-future-interactive-m5-eda
 -  2782 author: kneroma / notebook: m5-first-public-notebook-under-0-50
 -  2783 author: kneroma / notebook: m5-forecast-v2-python
 -  2784 author: kyakovlev / notebook: m5-custom-features
 -  2785 author: kyakovlev / notebook: m5-lags-features
 -  2786 author: kyakovlev / notebook: m5-simple-fe
 -  2787 author: kyakovlev / notebook: m5-three-shades-of-dark-darker-magic
 -  2788 author: kyakovlev / notebook: m5-witch-time
 -  2789 author: mayer79 / notebook: m5-for

 -  2885 author: evanca / notebook: uncovering-cinderellaness
 -  2886 author: headsortails / notebook: jump-shot-to-conclusions-march-madness-eda
 -  2887 author: jaseziv83 / notebook: applying-pythagorean-expectation-to-major-sports
 -  2888 author: jaseziv83 / notebook: moreyball-in-the-college-game-a-full-ncaa-eda
 -  2889 author: lucabasa / notebook: quantify-the-madness-a-study-of-competitiveness
 -  2890 author: mika30 / notebook: madness-at-home-and-on-the-court-part-2
 -  2891 author: nxrprime / notebook: right-left-shoot-march-madness-eda-and-analysis
 -  2892 author: parulpandey / notebook: decoding-march-madness
 -  2893 author: robikscube / notebook: 2020-march-madness-data-first-look-eda
 -  2894 author: robikscube / notebook: ncaa-basketball-court-plot-helper-functions
 -  2895 author: robikscube / notebook: redefining-ncaa-basketball-positions-using-data
 -  2896 author: shettysaanvi / notebook: jump-shot-to-conclusions-sanvi-march-madness-eda
 -  2897 author: tayyabali

 -  2990 author: robertoruiz / notebook: a-magic-feature
 -  2991 author: robertoruiz / notebook: my-frustrated-approach
 -  2992 author: sheriytm / notebook: feature-based-starter-tpot-lb0-559
 -  2993 author: sudalairajkumar / notebook: simple-exploration-notebook-mercedes
 -  2994 author: tilii7 / notebook: you-want-outliers-we-got-them-outliers
 -  2995 author: tnarik / notebook: likelihood-encoding-of-categorical-features
 -  2996 author: umbertogriffo / notebook: deep-learning
 -  2997 author: wti200 / notebook: xgboost-with-one-hot-encoding-r
### 248 ../data/repositories/kaggle/competitions/c\MerckActivity
### 249 ../data/repositories/kaggle/competitions/c\microsoft-malware-prediction
 -  2998 author: airbourne / notebook: data-dictionary
 -  2999 author: artgor / notebook: is-this-malware-eda-fe-and-lgb-updated
 -  3000 author: bogorodvo / notebook: lightgbm-baseline-model-using-sparse-matrix
 -  3001 author: cdeotte / notebook: neural-network-malware-0-67
 -  3002 author: cdeo

 -  3089 author: hukuda222 / notebook: nfl-simple-model-using-lightgbm
 -  3090 author: jaseziv83 / notebook: comprehensive-cleaning-and-eda-of-all-variables
 -  3091 author: jesucristo / notebook: animated-visualization
 -  3092 author: kingychiu / notebook: keras-nn-starter-crps-early-stopping
 -  3093 author: mrkmakr / notebook: lgbm-multiple-classifier
 -  3094 author: mrkmakr / notebook: neural-network-with-mae-objective-0-01381
 -  3095 author: pednt9 / notebook: vip-hint-coded
 -  3096 author: robikscube / notebook: big-data-bowl-comprehensive-eda-with-pandas
 -  3097 author: robikscube / notebook: nfl-big-data-bowl-plotting-player-position
 -  3098 author: ryches / notebook: model-free-benchmark
 -  3099 author: sryo188558 / notebook: cox-proportional-hazard-model
 -  3100 author: statsbymichaellopez / notebook: nfl-tracking-wrangling-voronoi-and-sonars
 -  3101 author: sudalairajkumar / notebook: simple-exploration-notebook-nfl
### 260 ../data/repositories/kaggle/competitions/

 -  3194 author: haimfeld87 / notebook: simple-catboost
 -  3195 author: headsortails / notebook: resistance-is-futile-transparent-conductors-eda
 -  3196 author: hireme / notebook: two-outputs-regressor-with-lightgbm
 -  3197 author: holar9 / notebook: hands-on-cubist-brnn
 -  3198 author: janpreets / notebook: using-the-atomic-coordinates-for-prediction
 -  3199 author: johnfarrell / notebook: nomad2018-simple-lgbm-starter
 -  3200 author: kemuel / notebook: python-exploration-with-domain-knowledge
 -  3201 author: leo1988 / notebook: exploratory-data-analysis-using-plotly
 -  3202 author: maxkapsecker / notebook: pca-pattern-discovery
 -  3203 author: mbkinaci / notebook: eda-xgboost-ridge-knn-extratrees-regression
 -  3204 author: pecooper / notebook: xgboost-benchmark-simple-usage-of-geometry-files
 -  3205 author: srserves85 / notebook: boosting-stacking-and-bayes-searching
 -  3206 author: sudhirnl7 / notebook: simple-ann
 -  3207 author: sudhirnl7 / notebook: simple-electron-vo

 -  3300 author: carlossouza / notebook: probabilistic-machine-learning-a-diff-approach
 -  3301 author: ChristianDenich / notebook: quantile-reg-lr-schedulers-checkpoints
 -  3302 author: gunesevitan / notebook: osic-pulmonary-fibrosis-progression-eda
 -  3303 author: khoongweihao / notebook: efficientnets-quantile-regression-inference
 -  3304 author: maunish / notebook: osic-super-cool-eda-and-pytorch-baseline
 -  3305 author: miklgr500 / notebook: linear-decay-based-on-resnet-cnn
 -  3306 author: nxrprime / notebook: fibrosis-eda-fast-ai
 -  3307 author: piantic / notebook: osic-pulmonary-fibrosis-progression-basic-eda
 -  3308 author: rohanrao / notebook: osic-understanding-laplace-log-likelihood
 -  3309 author: titericz / notebook: tabular-simple-eda-linear-model
 -  3310 author: twinkle0705 / notebook: your-starter-notebook-for-osic
 -  3311 author: ulrich07 / notebook: osic-multiple-quantile-regression-starter
 -  3312 author: vbmokin / notebook: higher-lb-score-by-tuning-mlos

 -  3410 author: skooch / notebook: petfinder-simple-lgbm-baseline
 -  3411 author: vshakhray / notebook: dls-petfinder
 -  3412 author: wrosinski / notebook: baselinemodeling
 -  3413 author: wuyhbb / notebook: final-small
### 284 ../data/repositories/kaggle/competitions/c\pf2012
### 285 ../data/repositories/kaggle/competitions/c\pf2012-at
### 286 ../data/repositories/kaggle/competitions/c\pf2012-diabetes
### 287 ../data/repositories/kaggle/competitions/c\PhotoQualityPrediction
### 288 ../data/repositories/kaggle/competitions/c\pkdd-15-predict-taxi-service-trajectory-i
 -  3414 author: arnoudcommandeur / notebook: visualisation-of-trips-by-google-maps
 -  3415 author: arunkt / notebook: visualization-of-taxi-trip-end-points
 -  3416 author: ashamli / notebook: all-trips
 -  3417 author: benhamner / notebook: last-location-benchmark
 -  3418 author: benhamner / notebook: test-trips-map
 -  3419 author: gopaldutt / notebook: test-trips-map
 -  3420 author: hochthom / notebook: visualiza

 -  3521 author: kmader / notebook: pretrained-vgg16-w-attention-for-seedlings
 -  3522 author: matrixb / notebook: cnn-svm-xgboost
 -  3523 author: meenavyas / notebook: plant-seedlings-classification
 -  3524 author: miklgr500 / notebook: keras-simple-model-0-97103-best-public-score
 -  3525 author: nikkonst / notebook: plant-seedlings-with-cnn-and-image-processing
 -  3526 author: omkarsabnis / notebook: seedling-classification-using-cnn-v13-0-95
 -  3527 author: oysteijo / notebook: just-some-simple-train-data-investigation
 -  3528 author: praanj / notebook: basic-keras-cnn-with-startified-kfold-evaluation
 -  3529 author: praanj / notebook: transfer-learning-vgg-19-resnet-50-with-kfold
 -  3530 author: raoulma / notebook: plants-xception-90-06-test-accuracy
 -  3531 author: solomonk / notebook: pytorch-simplenet-augmentation-cnn-lb-0-945
 -  3532 author: tylercosner / notebook: pytorch-starter-pre-trained-resnet50-torchvision
 -  3533 author: xingyuyang / notebook: cnn-with-keras

 -  3626 author: iafoss / notebook: panda-16x128x128-tiles
 -  3627 author: iafoss / notebook: panda-concat-tile-pooling-starter-0-79-lb
 -  3628 author: iafoss / notebook: panda-concat-tile-pooling-starter-inference
 -  3629 author: iamleonie / notebook: panda-eda-visualizations-suspicious-data
 -  3630 author: reighns / notebook: understanding-the-quadratic-weighted-kappa
 -  3631 author: rftexas / notebook: better-image-tiles-removing-white-spaces
 -  3632 author: rohitsingh9990 / notebook: panda-eda-better-visualization-simple-baseline
 -  3633 author: rohitsingh9990 / notebook: panda-inference-ensemble-trying-various-models
 -  3634 author: tanulsingh077 / notebook: prostate-cancer-in-depth-understanding-eda-model
 -  3635 author: tarunpaparaju / notebook: panda-challenge-resnet-multitask-8-fold-on-tpu
 -  3636 author: wouterbulten / notebook: getting-started-with-the-panda-dataset
 -  3637 author: xhlulu / notebook: panda-resize-and-save-train-data
 -  3638 author: yasufuminakama

 -  3735 author: rajmehra03 / notebook: a-detailed-explanation-of-keras-embedding-layer
 -  3736 author: rethfro / notebook: 1d-cnn-single-model-score-0-14-0-16-or-0-23
 -  3737 author: sudalairajkumar / notebook: keras-starter-script-with-word-embeddings
 -  3738 author: sudalairajkumar / notebook: simple-leaky-exploration-notebook-quora
 -  3739 author: tour1st / notebook: magic-feature-v2-0-045-gain
### 310 ../data/repositories/kaggle/competitions/c\R
### 311 ../data/repositories/kaggle/competitions/c\Raising-Money-to-Fund-an-Organizational-Mission
### 312 ../data/repositories/kaggle/competitions/c\random-acts-of-pizza
 -  3740 author: alvations / notebook: basic-nlp-with-nltk
 -  3741 author: benhamner / notebook: exploratory-plots-with-julia-and-gadfly
 -  3742 author: benhamner / notebook: rmarkdown-default-text
 -  3743 author: benhamner / notebook: simple-julia-benchmark
 -  3744 author: benhamner / notebook: wordclouds-1
 -  3745 author: chqngh / notebook: random-pizza
 -  374

 -  3841 author: ahayek84 / notebook: restaurant-revenue-predict
 -  3842 author: allenkong / notebook: restaurant-revenue-prediction
 -  3843 author: ani310 / notebook: restaurant-revenue
 -  3844 author: arsenal / notebook: geomap-for-average-revenue
 -  3845 author: ayushikaushik / notebook: regression-analysis
 -  3846 author: benhamner / notebook: boruta-random-forest-benchmark
 -  3847 author: benhamner / notebook: t-sne-restaurant-visualization
 -  3848 author: benhamner / notebook: top-10-leaderboard-performance-over-time-1
 -  3849 author: celikagit / notebook: revenue-prediction-using-random-forest-regressor
 -  3850 author: jatta3399 / notebook: revenuerrestr
 -  3851 author: jquesadar / notebook: restaurant-revenue-1st-place-solution
 -  3852 author: matt4byu / notebook: restaurant-revenue-prediction-analysis
 -  3853 author: meridk / notebook: ms-dos
 -  3854 author: qiujiqiong / notebook: correlationship-matrix
 -  3855 author: spoorthiuk / notebook: restaurant-revenue-pr

 -  3948 author: lparker7 / notebook: assignment-2
 -  3949 author: seshurajup / notebook: eda-for-santa-2019-revenge-of-the-accountants
 -  3950 author: shrutimechlearn / notebook: santa-returns-workshop-explorers-wave-1-vs-wave-2
 -  3951 author: vipito / notebook: fork-of-santa-ip
### 327 ../data/repositories/kaggle/competitions/c\santa-gift-matching
 -  3952 author: astraldawn / notebook: favour-choice-0-83909
 -  3953 author: batzner / notebook: deep-learning-benchmark-0-0439
 -  3954 author: dongxu027 / notebook: path-to-improve-score-hungarian-slowness
 -  3955 author: gaborfodor / notebook: improve-with-the-hungarian-method-0-9375
 -  3956 author: glenslade / notebook: baseline-python-ortools-algo-0-933795
 -  3957 author: golubev / notebook: 1-iteration-py-after-night-c-0-9337
 -  3958 author: golubev / notebook: simple-example-min-cost-flow
 -  3959 author: golubev / notebook: simple-example-mip-ortools-cbc
 -  3960 author: inversion / notebook: inversion-s-been-nice-benchmar

 -  4058 author: mannyelk / notebook: an-honest-approach
 -  4059 author: nanomathias / notebook: distribution-of-test-vs-training-data
 -  4060 author: nanomathias / notebook: feature-engineering-benchmarks
 -  4061 author: nulldata / notebook: jiazhen-to-armamut-via-gurchetan1000-0-56
 -  4062 author: ogrellier / notebook: feature-scoring-vs-zeros
 -  4063 author: ogrellier / notebook: santander-46-features
 -  4064 author: samratp / notebook: lightgbm-xgboost-catboost
 -  4065 author: sggpls / notebook: santander-pipeline-kernel-xgb-fe-lb1-38
 -  4066 author: shivamb / notebook: dataset-decomposition-techniques
 -  4067 author: sudalairajkumar / notebook: simple-exploration-baseline-santander-value
 -  4068 author: tezdhar / notebook: breaking-lb-fresh-start
 -  4069 author: titericz / notebook: giba-countvectorizer-d-lb-1-43
 -  4070 author: titericz / notebook: the-property-by-giba
 -  4071 author: tunguz / notebook: yaeda-yet-another-eda
### 333 ../data/repositories/kaggle/compet

notebook not found
 -  4209 author: meaninglesslives / notebook: nested-unet-with-efficientnet-encoder
notebook not found
 -  4210 author: mnpinto / notebook: pneumothorax-fastai-u-net
notebook not found
 -  4211 author: raddar / notebook: sample-submission-leak
notebook not found
 -  4212 author: retyidoro / notebook: eda-of-pneumothorax-dataset
notebook not found
 -  4213 author: rishabhiitbhu / notebook: unet-with-resnet34-encoder-pytorch
notebook not found
 -  4214 author: seesee / notebook: full-dataset
notebook not found
 -  4215 author: seriousran / notebook: image-pre-processing-for-chest-x-ray
notebook not found
### 346 ../data/repositories/kaggle/competitions/c\siim-isic-melanoma-classification
 -  4216 author: abhishek / notebook: accelerator-power-hour-pytorch-tpu
notebook not found
 -  4217 author: agentauers / notebook: incredible-tpus-finetune-effnetb0-b6-at-once
notebook not found
 -  4218 author: allunia / notebook: don-t-turn-into-a-smoothie-after-the-shake-up
noteboo

 -  4313 author: brassmonkey381 / notebook: viewing-leak-and-machine-images
notebook not found
 -  4314 author: cbryant / notebook: keras-cnn-statoil-iceberg-lb-0-1995-now-0-1516
notebook not found
 -  4315 author: cttsai / notebook: ensembling-gbms-lb-203
notebook not found
 -  4316 author: devm2024 / notebook: keras-model-for-beginners-0-210-on-lb-eda-r-d
notebook not found
 -  4317 author: dimitrif / notebook: domain-knowledge
notebook not found
 -  4318 author: dongxu027 / notebook: explore-stacking-lb-0-1463
notebook not found
 -  4319 author: jgroff / notebook: despeckling-synthetic-aperture-radar-sar-images
notebook not found
 -  4320 author: jirivrany / notebook: my-best-single-model-simple-cnn-lb-0-1541
notebook not found
 -  4321 author: kmader / notebook: exploring-the-icebergs-with-skimage-and-keras
notebook not found
 -  4322 author: knowledgegrappler / notebook: a-keras-prototype-0-21174-on-pl
notebook not found
 -  4323 author: muonneutrino / notebook: exploration-transf

 -  4416 author: maheshdadhich / notebook: a-laconic-approach-eda-lb-0-9860
notebook not found
 -  4417 author: neerjad / notebook: class-wise-regex-functions-l-b-0-995
notebook not found
 -  4418 author: opanichev / notebook: simple-en-baseline-lb-0-9937
notebook not found
 -  4419 author: savannahvi / notebook: 3-simple-steps-lb-9878-with-new-data
notebook not found
 -  4420 author: srisanthoshhari / notebook: xgboost-and-class-wise-regex-lb-0-9915
notebook not found
 -  4421 author: zfturbo / notebook: simple-en-baseline-lb-0-9867
notebook not found
### 363 ../data/repositories/kaggle/competitions/c\text-normalization-challenge-russian-language
 -  4422 author: arccosmos / notebook: ru-baseline-lb-0-9799-from-en-thread
notebook not found
 -  4423 author: danavg / notebook: explore-russian-text-normilization-challenge
notebook not found
 -  4424 author: dequadras / notebook: simple-aproach-0-9627-create-a-dictionary
notebook not found
 -  4425 author: innakt / notebook: russianclassi

 -  4503 author: pranav84 / notebook: a-beginner-s-guide-to-cern-s-trackml-challenge
 -  4504 author: shivamb / notebook: trajectory-animation-eda
 -  4505 author: sionek / notebook: bayesian-optimization
 -  4506 author: sionek / notebook: mod-dbscan-x-100-parallel
 -  4507 author: wesamelshamy / notebook: trackml-problem-explanation-and-data-exploration
 -  4508 author: yuval6967 / notebook: 7th-place-clustering-extending-ml-merging-0-75
### 376 ../data/repositories/kaggle/competitions/c\tradeshift-text-classification
 -  4509 author: tarunaryyan / notebook: imputation-for-missing-values-in-features
### 377 ../data/repositories/kaggle/competitions/c\transfer-learning-on-stack-exchange-tags
 -  4510 author: akshatpathak / notebook: text-data-clustering
 -  4511 author: anokas / notebook: frequent-words-model-v2
 -  4512 author: charlescostello / notebook: transfer-learning-on-stack-exchange-tags
 -  4513 author: eliotbarr / notebook: word-clouds
 -  4514 author: katarz / notebook: tag

 -  4608 author: enrique1500 / notebook: rental-listing-ny-map
 -  4609 author: guoday / notebook: cv-statistics-better-parameters-and-explaination
 -  4610 author: jxnlco / notebook: deduplicating-features
 -  4611 author: kashnitsky / notebook: topic-6-feature-engineering-and-feature-selection
 -  4612 author: neviadomski / notebook: data-exploration-two-sigma-renthop
 -  4613 author: poonaml / notebook: two-sigma-renthop-eda
 -  4614 author: rakhlin / notebook: another-python-version-of-it-is-lit-by-branden
 -  4615 author: robertoruiz / notebook: feature-engineering-1-sentiment-analysis
 -  4616 author: somnisight / notebook: microsoft-lightgbm-starter
 -  4617 author: stanislavushakov / notebook: python-version-of-it-is-lit-by-branden
 -  4618 author: sudalairajkumar / notebook: simple-exploration-notebook-2-connect
 -  4619 author: sudalairajkumar / notebook: xgb-starter-in-python
 -  4620 author: ygtcrt / notebook: how-to-deal-with-features-in-renthop-data
### 386 ../data/reposi

 -  4716 author: mark4h / notebook: vsb-1st-place-solution
 -  4717 author: miklgr500 / notebook: flatiron
 -  4718 author: ratthachat / notebook: demythifying-matthew-correlation-coefficients-mcc
 -  4719 author: roydatascience / notebook: eda-iso-pca-lle-stratified-lstm-attention
 -  4720 author: sohier / notebook: reading-the-data-with-python
 -  4721 author: suicaokhoailang / notebook: 5-fold-lstm-with-threshold-tuning-0-618-lb
 -  4722 author: suicaokhoailang / notebook: transformer-baseline-0-672-lb
 -  4723 author: tarunpaparaju / notebook: vsb-competition-attention-bilstm-with-features
 -  4724 author: theoviel / notebook: fast-fourier-transform-denoising
 -  4725 author: xhlulu / notebook: exploring-signal-processing-with-scipy
 -  4726 author: zoujie / notebook: analyze-power-line-signals-like-a-physicist
### 394 ../data/repositories/kaggle/competitions/c\walmart-recruiting-sales-in-stormy-weather
### 395 ../data/repositories/kaggle/competitions/c\walmart-recruiting-store-sal

 -  4821 author: rajmehra03 / notebook: a-detailed-explanation-of-keras-embedding-layer
 -  4822 author: rejasupotaro / notebook: let-s-cook-model
 -  4823 author: rejasupotaro / notebook: representations-for-ingredients
 -  4824 author: rejasupotaro / notebook: what-are-ingredients
 -  4825 author: sathyz / notebook: simple-nn-approach-kfold
 -  4826 author: shivamb / notebook: tf-idf-with-ovr-svm-what-s-cooking
 -  4827 author: tanulsingh077 / notebook: what-s-cooking
 -  4828 author: tejaeduc / notebook: whats-cooking-neural-nets-log-reg-svc
 -  4829 author: umeshnarayanappa / notebook: recipes-tf-idf-and-bigrams
 -  4830 author: wendykan / notebook: what-s-cooking-eda
### 403 ../data/repositories/kaggle/competitions/c\WIC2011
### 404 ../data/repositories/kaggle/competitions/c\wikichallenge
### 405 ../data/repositories/kaggle/competitions/c\wise-2014
### 406 ../data/repositories/kaggle/competitions/c\womens-machine-learning-competition-2018
 -  4831 author: amlanpraharaj / notebook:

 -  4928 author: xbldev / notebook: test-notebook-for-google-youtube-classification
 -  4929 author: xpeuler / notebook: frame2nparray
 -  4930 author: xpeuler / notebook: split-video-for-data-augmentation
### 416 ../data/repositories/kaggle/competitions/c\youtube8m-2018
 -  4931 author: abimannan / notebook: youtube-2m
 -  4932 author: amansrivastava / notebook: exploration-bi-lstm-model
 -  4933 author: amitkumarjaiswal / notebook: google-youtube-1st-try
 -  4934 author: anirudh257 / notebook: exploration-bi-lstm-model-by-aman-srivasthava
 -  4935 author: artyomp / notebook: a-fixed-download-script
 -  4936 author: ashishpatel26 / notebook: youtube-8m-dataset-using-extratreesclassifier
 -  4937 author: ayoubchebbi / notebook: youtube-8m-analytics
 -  4938 author: duboviy / notebook: starter-kernel-yt8m-2018-sample-data
 -  4939 author: jameschien / notebook: newbie-easy-way-for-loading-video-frame-tfrecord
 -  4940 author: juliaelliott / notebook: starter-kernel-yt8m-2018-sample-data

In [14]:
# scoring function to get a score between 0...1 for integer-values, 0.5 should be at ~100
def score(n, precision=3):
    if isinstance(n, int) or isinstance(i, float):
        return round(1-1/math.pow(1+n, 0.15), precision)
    else:
        try:
            n = int(n)
        except:
            return 0
        
    return round(1-1/math.pow(1+n, 0.15), precision)

for n in [0,1,10,25,50,100,1000,10000]:
    print(score(n))
    
print(score('3'))
print(score('a'))

0.0
0.099
0.302
0.387
0.446
0.5
0.645
0.749
0.188
0


In [15]:
# throw all parsed meta-data together in a single csv
# select only true ML cases

folder_base = '../data/repositories/kaggle/competitions/'
file_json = 'meta.json'
fp_csv = '../data/database/kaggle_competitions_01_original.csv'
fp_research = '../data/database/kaggle_competitions_02_research.csv'

quit = 0 # quit after n files processed / 0 ... no limit

folders = os.listdir(folder_base)
print('folder:', len(folders))
i = 0
j = 0

runtime_start = time.time()
df = pd.DataFrame()
df2 = pd.DataFrame()

for folder in folders:
    subfolders = os.listdir(os.path.join(folder_base,folder))
    print('subfolder:', len(folders))
    for subfolder in subfolders:
        #print('###', i, os.path.join(folder_base,folder,subfolder))
        path = os.path.join(folder_base,folder,subfolder, 'notebooks/')
        i += 1
        
        if os.path.exists(path):
            projects = os.listdir(path)

            for author in projects:
                #print('author:', author)
                items = os.listdir(os.path.join(folder_base,folder,subfolder, 'notebooks/', author))

                for notebook in items:
                    j+=1
                    #print(' - ', j, 'author:', author, '/ notebook:', notebook)
                    fp_json = os.path.join(folder_base,folder,subfolder, 'notebooks/', author, notebook, file_json)
                    
                    if os.path.isfile(fp_json):
                        data = load_data(fp_json, fromJson=True)
                        #print(data)
                        data['score_votes'] = score(data['votes'])
                        data['score_views'] = score(data['views'])
                        ml_score = 0
                        if len(data['ml_slugs']) > 0 or len(data['ml_terms']) > 0:
                            ml_score += 0.2
                        if 'ml_tags' in data and len(data['ml_tags']) > 0:
                            ml_score += 0.3
                        if len(data['ml_libs']) > 0:
                            ml_score += 0.5
                        data['ml_detected'] = ml_score
                        
                        data['description'] = data['description'].replace('\n', ' ').replace('\r', '').replace('¶', '').strip()
                        
                        # date (ignoring GMT+x)
                        # Wed Dec 19 2018 14:42:40 GMT+0100 (Mitteleuropäische Normalzeit)
                        # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
                        date_time_str = data['date'].split('GMT')
                        date_time_str = date_time_str[0].strip()
                        date_time_obj = datetime.datetime.strptime(date_time_str, '%a %b %d %Y %H:%M:%S')

                        #print('Date:', date_time_obj.date())
                        #print('Time:', date_time_obj.time())
                        #print('Date-time:', date_time_obj)
                        data['date'] = date_time_obj
                        
                        # store only items with:
                        # - ml_score >= 0.5
                        # - are indicated as "best submission" # and 'Best Submission' in data['submission'] 
                        # - description > 1 word
                        words = data['description'].split(' ')
                        if ml_score >= 0.5 and len(words) > 5:
                            df = df.append(data, ignore_index=True)
                            print(data['link'], 'is ML use case')
                        
                        else:
                            #if 'R' in data['type']:
                            df2 = df2.append(data, ignore_index=True)
                            print(data['link'], 'is not ML use case')
                            
                    #if j % 100 == 0:
                    #    print('folder', i, '/ notebook', j)
                        
                if quit!=0 and j>quit:
                    break
            if quit!=0 and j>quit:
                break
    if quit!=0 and j>quit:
        break
        
# drop duplicates
df = df.drop_duplicates(['link'])

runtime_end = time.time()
print('runtime:', round(runtime_end - runtime_start, 3), 'seconds for', j, 'items')
print(df.shape)
print(df.head())
        
# drop columns
df.drop(columns=['author', 'submission'], inplace=True)
df2.drop(columns=['author', 'submission'], inplace=True)

df.to_csv(fp_csv, sep=';', index=False)
df2.to_csv(fp_research, sep=';', index=False)

folder: 1
subfolder: 1
https://www.kaggle.com/a45632/classification-tfidf-svm-2-0 is ML use case
https://www.kaggle.com/amansohane/level-3-with-partial-deciphering-0-94-level-3 is ML use case
https://www.kaggle.com/ananthu017/classification-tfidf-logistic is not ML use case
https://www.kaggle.com/ashishpatel26/attension-layer-basic-for-nlp is ML use case
https://www.kaggle.com/ashishpatel26/beginner-to-intermediate-nlp-tutorial is ML use case
https://www.kaggle.com/ashishpatel26/everything-you-want-to-know-about-20-ngctc is not ML use case
https://www.kaggle.com/ashishpatel26/stratified-kfold-hyperparameter-tuning is ML use case
https://www.kaggle.com/delayedkarma/some-basic-explorations-lgb-baseline is not ML use case
https://www.kaggle.com/interneuron/difficulty-1-deciphering-wip is ML use case
https://www.kaggle.com/jazivxt/enigma-layers-template is not ML use case
https://www.kaggle.com/kaggleuser58/1-char-decryption-in-level-1-2-and-3-gives-98-85 is ML use case
https://www.kaggle.

https://www.kaggle.com/hmendonca/airbus-mask-rcnn-and-coco-transfer-learning is ML use case
https://www.kaggle.com/hmendonca/classification-and-segmentation-fp is ML use case
https://www.kaggle.com/hmendonca/u-net-model-with-submission is ML use case
https://www.kaggle.com/iafoss/fine-tuning-resnet34-on-ship-detection is ML use case
https://www.kaggle.com/iafoss/unet34-dice-0-87 is not ML use case
https://www.kaggle.com/iafoss/unet34-submission-tta-0-699-new-public-lb is not ML use case
https://www.kaggle.com/inversion/run-length-decoding-quick-start is not ML use case
https://www.kaggle.com/julian3833/2-understanding-and-plotting-rle-bounding-boxes is ML use case
https://www.kaggle.com/julian3833/4-exploring-public-models is ML use case
https://www.kaggle.com/kmader/baseline-u-net-model-part-1 is ML use case
https://www.kaggle.com/kmader/from-trained-u-net-to-submission-part-2 is ML use case
https://www.kaggle.com/kmader/transfer-learning-for-boat-or-no-boat is ML use case
https://www

https://www.kaggle.com/rathimadhav/notebook79ae0de477 is not ML use case
https://www.kaggle.com/aitude/ashrae-kfold-lightgbm-without-leak-1-08 is ML use case
https://www.kaggle.com/caesarlupum/ashrae-start-here-a-gentle-introduction is ML use case
https://www.kaggle.com/corochann/ashrae-training-lgbm-by-meter-type is ML use case
https://www.kaggle.com/corochann/optuna-tutorial-for-hyperparameter-optimization is ML use case
https://www.kaggle.com/gunesevitan/ashrae-ucf-spider-and-eda-full-test-labels is ML use case
https://www.kaggle.com/hmendonca/starter-eda-and-feature-selection-ashrae3 is ML use case
https://www.kaggle.com/isaienkov/keras-nn-with-embeddings-for-cat-features-1-15 is not ML use case
https://www.kaggle.com/isaienkov/lightgbm-fe-1-19 is not ML use case
https://www.kaggle.com/jaseziv83/a-deep-dive-eda-into-all-variables is not ML use case
https://www.kaggle.com/kailex/ac-dc is not ML use case
https://www.kaggle.com/kimtaegwan/what-s-your-cv-method is ML use case
https://w

https://www.kaggle.com/danofer/baseline-feature-engineering-geotab-69-5-lb is ML use case
https://www.kaggle.com/dcaichara/feature-engineering-and-lightgbm is not ML use case
https://www.kaggle.com/fatihbilgin/data-visualization-and-eda-for-geotab-bigquery is not ML use case
https://www.kaggle.com/gaborfodor/0-feature-extraction is ML use case
https://www.kaggle.com/gaborfodor/5-combine-models is not ML use case
https://www.kaggle.com/janlauge/intersection-congestion-eda is ML use case
https://www.kaggle.com/jpmiller/eda-to-break-through-rmse-68 is not ML use case
https://www.kaggle.com/kabure/insightful-eda-modeling-lgbm-hyperopt is ML use case
https://www.kaggle.com/pradeepmuniasamy/comparative-study-of-models-geotab-inertsection is not ML use case
https://www.kaggle.com/pradeepmuniasamy/extensive-eda-and-modelling-geotab-inertsection is not ML use case
https://www.kaggle.com/prazhant/predicting-wait-times-at-intersections is ML use case
https://www.kaggle.com/pulkitmehtawork1985/bea

https://www.kaggle.com/saisatish09/a-beginner-s-guide-to-feature-selection-methods is ML use case
https://www.kaggle.com/scirpus/benouilli-naive-bayes is not ML use case
https://www.kaggle.com/timesler/xgboost-15-02-2016 is not ML use case
https://www.kaggle.com/trottefox/blending-trees is not ML use case
https://www.kaggle.com/trottefox/nearest-neighbour-linear-features is not ML use case
https://www.kaggle.com/cartographic/bish-bash-xgboost is not ML use case
https://www.kaggle.com/cartographic/de-dupe-categoricals is not ML use case
https://www.kaggle.com/cpmpml/optimizing-probabilities-for-best-mcc is ML use case
https://www.kaggle.com/danielfg/xgboost-reg-linear-lb-0-485 is not ML use case
https://www.kaggle.com/dollardollar/eda-of-important-features is not ML use case
https://www.kaggle.com/gaborfodor/69-failure-rate is ML use case
https://www.kaggle.com/gaborfodor/notebookd19d11e4f2 is ML use case
https://www.kaggle.com/gingerman/shopfloor-visualization is not ML use case
https:

https://www.kaggle.com/arathee2/read-bson-into-pandas-and-start-exploring is ML use case
https://www.kaggle.com/bguberfain/just-showing-a-few-images is not ML use case
https://www.kaggle.com/bguberfain/naive-keras-cdiscount is ML use case
https://www.kaggle.com/bguberfain/naive-statistics is not ML use case
https://www.kaggle.com/bguberfain/not-so-naive-way-to-convert-bson-to-files is ML use case
https://www.kaggle.com/blazeka/multi-gpu-tensorflow-convnet-0-65 is ML use case
https://www.kaggle.com/blazeka/validate-download-with-sha256-hash is not ML use case
https://www.kaggle.com/cerebrium/multi-class-logistic-regression-using-tensor-flow is ML use case
https://www.kaggle.com/ezietsman/inception-v3-finetune is not ML use case
https://www.kaggle.com/humananalog/keras-generator-for-reading-directly-from-bson is ML use case
https://www.kaggle.com/inversion/processing-bson-files is ML use case
https://www.kaggle.com/lamdang/fast-shuffle-bson-generator-for-keras is ML use case
https://www.

https://www.kaggle.com/elvenmonk/ciphertext-challenge-iii-fast-level-3 is not ML use case
https://www.kaggle.com/elvenmonk/difficulty-1-reverse-engineering-no-ml is ML use case
https://www.kaggle.com/gullfaxi/a-few-exploration-on-difficulty-4 is not ML use case
https://www.kaggle.com/jiaofenx/ciphertext-challenge-iii-simple-eda-and-cracking is ML use case
https://www.kaggle.com/julianb/time-efficient-pairing is not ML use case
https://www.kaggle.com/kaggleuser58/cipher-challenge-iii-level-1 is ML use case
https://www.kaggle.com/nbzee1/some-hints-for-levels-3-and-4-now-with-solutions is ML use case
https://www.kaggle.com/pednt9/something-to-begin-with-a-first-hint is ML use case
https://www.kaggle.com/sanikamal/ciphertext-challenge-iii is not ML use case
https://www.kaggle.com/seriousran/only-length-0-00000 is not ML use case
https://www.kaggle.com/smlopezza/ciphertext-challenge-iii-v3 is not ML use case
https://www.kaggle.com/sunandosamaddar/scratch-8urykgfg is not ML use case
https://

https://www.kaggle.com/binhlc/sars-cov-2-exponential-model-week-2 is not ML use case
https://www.kaggle.com/corochann/covid-19-effect-of-temperature-humidity is ML use case
https://www.kaggle.com/cpmpml/fatalities-prediction-via-linear-regression is ML use case
https://www.kaggle.com/davidbnn92/weather-data is not ML use case
https://www.kaggle.com/dferhadi/global-forecasting-covid-19-random-forest is ML use case
https://www.kaggle.com/dferhadi/logistic-curve-fitting-global-covid-19-confirmed is ML use case
https://www.kaggle.com/dott1718/cv19w2-2-sub is not ML use case
https://www.kaggle.com/eswarchandt/timeseries-forecasting-of-covid-19-week-2-arima is not ML use case
https://www.kaggle.com/gaborfodor/covid-19-a-few-charts-and-a-simple-baseline is ML use case
https://www.kaggle.com/gaborfodor/covid19-global-forecasting-top-submissions is not ML use case
https://www.kaggle.com/khotijahs1/covid19-forecasting-randomforest is ML use case
https://www.kaggle.com/mdmahmudferdous/covid-19-gl

https://www.kaggle.com/tunguz/simple-covid-19-ca-eda is not ML use case
https://www.kaggle.com/nishantrock/dlrm-beginner-steps is not ML use case
https://www.kaggle.com/abhishek/beating-the-benchmark is not ML use case
https://www.kaggle.com/benhamner/exploring-the-crowdflower-data is not ML use case
https://www.kaggle.com/benhamner/python-benchmark is not ML use case
https://www.kaggle.com/benhamner/wordclouds is not ML use case
https://www.kaggle.com/chenglongchen/customized-softkappa-loss-in-xgboost is not ML use case
https://www.kaggle.com/domcastro/utility-write-svd-components-to-file is not ML use case
https://www.kaggle.com/duttaroy/porter-stemmer is not ML use case
https://www.kaggle.com/elenacuoco/cf-nn-py is not ML use case
https://www.kaggle.com/gmilosev/r-version-of-benchmark-script is not ML use case
https://www.kaggle.com/gshguru/clubbing-2-benchmarks is not ML use case
https://www.kaggle.com/hiendang/auto-correct-query is not ML use case
https://www.kaggle.com/jigneshvya

https://www.kaggle.com/crawford/starter-kernel is not ML use case
https://www.kaggle.com/danielbecker/careervillage-org-recommendation-engine is not ML use case
https://www.kaggle.com/erikbruin/careervillage-org-data-exploration is ML use case
https://www.kaggle.com/ferdzso/knowledge-graph-analysis-with-node2vec is ML use case
https://www.kaggle.com/ididur/nn-based-recommender-engine is not ML use case
https://www.kaggle.com/infocusp/deepdive-into-careervillage is not ML use case
https://www.kaggle.com/ioohooi/eda-with-some-insights-data-er-diagram is ML use case
https://www.kaggle.com/ironben/rdbs-to-graphdb-neo4j-network-approach is ML use case
https://www.kaggle.com/mistermichael/careervillage-exploration is not ML use case
https://www.kaggle.com/nasirislamsujan/eda-data-science-for-good is not ML use case
https://www.kaggle.com/niyamatalmass/lightfm-hybrid-recommendation-system is ML use case
https://www.kaggle.com/rblcoder/recommend-based-on-nearest-neighbors is ML use case
https:

https://www.kaggle.com/colinpriest/denoising-with-r-part-1 is not ML use case
https://www.kaggle.com/colinpriest/denoising-with-r-part-2 is not ML use case
https://www.kaggle.com/colinpriest/denoising-with-r-part-5 is not ML use case
https://www.kaggle.com/colinpriest/denoising-with-r-part-6 is not ML use case
https://www.kaggle.com/dchudz/clean-by-thresholding is not ML use case
https://www.kaggle.com/michalbrezk/denoise-images-using-autoencoders-tf-keras is ML use case
https://www.kaggle.com/ngutten/high-pass-filter is not ML use case
https://www.kaggle.com/oliversherouse/denoising-with-ransom-forests is not ML use case
https://www.kaggle.com/palaksood97/image-denoising is not ML use case
https://www.kaggle.com/phylake1337/clear-it is ML use case
https://www.kaggle.com/rdokov/background-removal is not ML use case
https://www.kaggle.com/rdokov/nn-starter-kit is not ML use case
https://www.kaggle.com/sushanth1995/image-augmentation-and-neural-encoder-decoder is ML use case
https://www.

https://www.kaggle.com/coronate/donorschoose-exploratory-analysis is not ML use case
https://www.kaggle.com/fizzbuzz/beginner-s-guide-to-capsule-networks is ML use case
https://www.kaggle.com/headsortails/an-educated-guess-update-feature-engineering is not ML use case
https://www.kaggle.com/hoonkeng/how-to-get-81-gru-att-lgbm-tf-idf-eda is ML use case
https://www.kaggle.com/ibrahimaptlo10/probability-project-approved is not ML use case
https://www.kaggle.com/jagangupta/understanding-approval-donorschoose-eda-fe-eli5 is ML use case
https://www.kaggle.com/jmbull/xtra-credit-xgb-lgb-tfidf-feature-stacking is ML use case
https://www.kaggle.com/matthewa313/ensembling-with-logistic-regression-lb-82-4 is not ML use case
https://www.kaggle.com/nulldata/intro-to-pandas-profiling-simple-fast-eda is ML use case
https://www.kaggle.com/opanichev/lightgbm-and-tf-idf-starter is not ML use case
https://www.kaggle.com/qinhui1999/deep-learning-is-all-you-need-lb-0-80x is not ML use case
https://www.kagg

https://www.kaggle.com/paultimothymooney/explore-image-metadata-s5p-gfs-gldas is ML use case
https://www.kaggle.com/paultimothymooney/how-to-get-started-with-the-earth-engine-data is not ML use case
https://www.kaggle.com/paultimothymooney/overview-of-the-eie-analytics-challenge is not ML use case
https://www.kaggle.com/ragnar123/exploratory-data-analysis-and-factor-model-idea is ML use case
https://www.kaggle.com/raviyadav2398/ds4g-emission-factor is ML use case
https://www.kaggle.com/tiurii/ds4g-modelling-of-emissions-of-power-plants is ML use case
https://www.kaggle.com/vlarmet/an-r-notebook-for-no2-emission-factor is not ML use case
https://www.kaggle.com/vpatricio/ds4g-where-does-the-no2-come-from is ML use case
https://www.kaggle.com/sanikamal/air-quality-prediction-eda is not ML use case
https://www.kaggle.com/aamaia/rgb-using-m-bands-example is not ML use case
https://www.kaggle.com/aamaia/small-vehicles is not ML use case
https://www.kaggle.com/aamaia/trees-are-red-buildings-a

https://www.kaggle.com/sakvaua/animated-check-ins is not ML use case
https://www.kaggle.com/svpons/grid-knn is not ML use case
https://www.kaggle.com/svpons/grid-plus-classifier is not ML use case
https://www.kaggle.com/valeriur/python-starter-0-55 is not ML use case
https://www.kaggle.com/zeroblue/mad-scripts-battle-z is not ML use case
https://www.kaggle.com/zfturbo/mad-scripts-battle is not ML use case
https://www.kaggle.com/ajaysh/stackoverflow-tag-prediction is not ML use case
https://www.kaggle.com/curioso/link-prediction-facebook is not ML use case
https://www.kaggle.com/genialgokul1099/social-network-graph-link-prediction is ML use case
https://www.kaggle.com/aparajit0511/facial-keypoint-detection-udacity is not ML use case
https://www.kaggle.com/balraj98/data-augmentation-for-facial-keypoint-detection is ML use case
https://www.kaggle.com/chaitanyagarikipati/facial-keypoints-detection-tensorflow-cnn is not ML use case
https://www.kaggle.com/datawanderer/mixing-cnn-regularizati

https://www.kaggle.com/szacho/augmix-data-augmentation-on-tpu is ML use case
https://www.kaggle.com/wrrosa/tpu-enet-b7-densenet is ML use case
https://www.kaggle.com/xhlulu/flowers-tpu-concise-efficientnet-b7 is ML use case
https://www.kaggle.com/xiejialun/gridmask-data-augmentation-with-tensorflow is ML use case
https://www.kaggle.com/yihdarshieh/batch-implementation-of-more-data-augmentations is ML use case
https://www.kaggle.com/yihdarshieh/detailed-guide-to-custom-training-with-tpus is ML use case
https://www.kaggle.com/yihdarshieh/make-chris-deotte-s-data-augmentation-faster is ML use case
https://www.kaggle.com/ambarish/forest-cover-type-eda-and-modelling is not ML use case
https://www.kaggle.com/arjundas/forest-run is not ML use case
https://www.kaggle.com/artgor/forest-exploration-and-trees is ML use case
https://www.kaggle.com/ashishpatel26/bayesian-random-forest-lightgbm is ML use case
https://www.kaggle.com/bigkirill/forest-cover-using-catboost-multiclass-classifier is not M

https://www.kaggle.com/ogrellier/i-have-seen-the-future is ML use case
https://www.kaggle.com/ogrellier/teach-lightgbm-to-sum-predictions is ML use case
https://www.kaggle.com/ogrellier/user-level-lightgbm-lb-1-4480 is not ML use case
https://www.kaggle.com/ogrellier/using-classification-for-predictions is ML use case
https://www.kaggle.com/paulorzp/perfect-score-one-line-without-semicolon is not ML use case
https://www.kaggle.com/pavansanagapati/google-analytics-simple-exploration is ML use case
https://www.kaggle.com/plasticgrammer/customer-revenue-prediction-v2-playground is not ML use case
https://www.kaggle.com/shivamb/exploratory-analysis-ga-customer-revenue is ML use case
https://www.kaggle.com/smasar/tutorial-preprocessing-processing-evaluation is ML use case
https://www.kaggle.com/sudalairajkumar/simple-exploration-baseline-ga-customer-revenue is ML use case
https://www.kaggle.com/atogni85/galaxy-convnet is ML use case
https://www.kaggle.com/helmehelmuto/keras-cnn is not ML us

https://www.kaggle.com/shonenkov/bayesian-optimization-wbf-efficientdet is ML use case
https://www.kaggle.com/shonenkov/inference-efficientdet is ML use case
https://www.kaggle.com/shonenkov/oof-evaluation-mixup-efficientdet is ML use case
https://www.kaggle.com/shonenkov/training-efficientdet is ML use case
https://www.kaggle.com/shonenkov/wbf-approach-for-ensemble is ML use case
https://www.kaggle.com/shonenkov/wbf-over-tta-single-model-efficientdet is ML use case
https://www.kaggle.com/tanulsingh077/end-to-end-object-detection-with-transformers-detr is ML use case
https://www.kaggle.com/ufownl/global-wheat-detection-pseudo-labaling is ML use case
https://www.kaggle.com/aldrin644/analysis-between-new-and-old-open-image-dataset is ML use case
https://www.kaggle.com/aldrin644/bounding-box-prediction-using-faster-rcnn-resnet is ML use case
https://www.kaggle.com/ashishpatel26/inception-resnet-comb-approach-on-google-image-ai is not ML use case
https://www.kaggle.com/ashishpatel26/xcepti

https://www.kaggle.com/anlthms/convnet-0-89 is not ML use case
https://www.kaggle.com/bitsofbits/naive-nnet is not ML use case
https://www.kaggle.com/datacanary/what-do-these-things-look-like is not ML use case
https://www.kaggle.com/datenkieker/beat-the-benchmark-0-67 is not ML use case
https://www.kaggle.com/deepcnn/rf-lda-lr is not ML use case
https://www.kaggle.com/elenacuoco/simple-grasp-with-sklearn is not ML use case
https://www.kaggle.com/karma86/neural-nets is not ML use case
https://www.kaggle.com/karthikmurali11/logistic-regression-with-r-0-65 is not ML use case
https://www.kaggle.com/korowiow/simple-grasp-with-sklearn-0-70 is not ML use case
https://www.kaggle.com/kumareshd/simple-low-freq-0-80 is not ML use case
https://www.kaggle.com/lnicalo/simple-low-freq-0-80 is not ML use case
https://www.kaggle.com/stefaneng/simple-python-pandas-plots is not ML use case
https://www.kaggle.com/titericz/simple-grasp-with-sklearn-giba is not ML use case
https://www.kaggle.com/xophe92/fi

https://www.kaggle.com/jionie/tta-power-densenet169 is ML use case
https://www.kaggle.com/mpalermo/keras-pipeline-custom-generator-imgaug is not ML use case
https://www.kaggle.com/qitvision/a-complete-ml-pipeline-fast-ai is ML use case
https://www.kaggle.com/robotdreams/one-cycle-policy-with-keras is ML use case
https://www.kaggle.com/sdelecourt/cnn-with-keras is not ML use case
https://www.kaggle.com/seefun/you-really-need-attention-pytorch is ML use case
https://www.kaggle.com/sermakarevich/complete-handcrafted-pipeline-in-pytorch-resnet9 is not ML use case
https://www.kaggle.com/suicaokhoailang/wip-densenet121-baseline-with-fastai is ML use case
https://www.kaggle.com/vbookshelf/cnn-how-to-use-160-000-images-without-crashing is ML use case
https://www.kaggle.com/cristianfat/hiv-progression is not ML use case
https://www.kaggle.com/lingyuxiong/predict-hiv-progression is not ML use case
https://www.kaggle.com/tiwaris436/predict-hiv-progression is not ML use case
https://www.kaggle.com

https://www.kaggle.com/ys19931006/ohohoh is not ML use case
https://www.kaggle.com/zytfoo/how-much-did-it-rain-kernel is not ML use case
https://www.kaggle.com/alexanderliao/image-augmentation-demo-with-albumentation is ML use case
https://www.kaggle.com/allunia/protein-atlas-exploration-and-baseline is ML use case
https://www.kaggle.com/allunia/uncover-target-correlations-with-bernoulli-mixture is ML use case
https://www.kaggle.com/artemtprv/load-external-data is not ML use case
https://www.kaggle.com/byrachonok/pretrained-inceptionresnetv2-base-classifier is not ML use case
https://www.kaggle.com/guglielmocamporese/macro-f1-score-keras is ML use case
https://www.kaggle.com/iafoss/pretrained-resnet34-with-rgby-0-460-public-lb is ML use case
https://www.kaggle.com/iluxave/inceptionv3-with-sliding-window-image-breakdown is ML use case
https://www.kaggle.com/jschnab/exploring-the-human-protein-atlas-images is ML use case
https://www.kaggle.com/kmader/transfer-learning-for-human-protein-s

https://www.kaggle.com/zexihan/simplest-multithreading-downloader is not ML use case
https://www.kaggle.com/abhishek/mask-rcnn-using-torchvision-0-17 is not ML use case
https://www.kaggle.com/aerdem4/fashion-some-short-sanity-checks is not ML use case
https://www.kaggle.com/go1dfish/u-net-baseline-by-pytorch-in-fgvc6-resize is ML use case
https://www.kaggle.com/go1dfish/updated4-29-fgvc6-simple-eda is ML use case
https://www.kaggle.com/hyeonho/imaterialist-fashion-2019-at-fgvc6-eda is ML use case
https://www.kaggle.com/interneuron/fastai-custom-rle is ML use case
https://www.kaggle.com/interneuron/segmentation-models-pytorch-catalyst is ML use case
https://www.kaggle.com/jmourad100/eda-imaterialist-fashion-2019-at-fgvc6 is not ML use case
https://www.kaggle.com/kimwoojeong/simple-eda-imaterialist-fashion-2019-at-fgvc6 is not ML use case
https://www.kaggle.com/kyazuki/calculate-evaluation-score is not ML use case
https://www.kaggle.com/latticetower/eda-supercategories-attributes-correct

https://www.kaggle.com/jpmiller/basic-eda-with-images is ML use case
https://www.kaggle.com/jtlowery/plots-and-interactive-graphs-eda is not ML use case
https://www.kaggle.com/malyutins/keras-f2-score is not ML use case
https://www.kaggle.com/mengtianjian/f2-score-implementation-in-pytorch is not ML use case
https://www.kaggle.com/petrov/naive-submission-updated-for-stage-2-0-088 is not ML use case
https://www.kaggle.com/phmagic/keras-densenet121-multi-label-baseline is ML use case
https://www.kaggle.com/rezwan249/datasets-description-and-test-images-display is not ML use case
https://www.kaggle.com/ryanzhang/keras-f2-metric is ML use case
https://www.kaggle.com/victorhz/cnn-with-20-classes-trained-validation-set is ML use case
https://www.kaggle.com/victorhz/fork-of-cnn-with-20-classes-trained-validation-set is ML use case
https://www.kaggle.com/victorhz/just-guess is not ML use case
https://www.kaggle.com/victorkuzn1986/quick-draft-in-keras is not ML use case
https://www.kaggle.com/y

https://www.kaggle.com/mylesoneill/normalized-kaggle-medal-count-by-country is not ML use case
https://www.kaggle.com/nagadomi/list-of-installed-packages is not ML use case
https://www.kaggle.com/tanitter/grid-search-xgboost-with-scikit-learn is not ML use case
https://www.kaggle.com/toshik/splines-with-r is not ML use case
https://www.kaggle.com/triskelion/connected-particles-iii-bl-ocks is not ML use case
https://www.kaggle.com/algila/inception-v3-and-k-fold-in-python-0-98996 is not ML use case
https://www.kaggle.com/ambarish/invasive-species-monitoring-analysis is not ML use case
https://www.kaggle.com/amlacorp/keras-starter-fork is ML use case
https://www.kaggle.com/ardiya/tensorflow-vgg-pretrained is ML use case
https://www.kaggle.com/chmaxx/finetune-vgg16-0-97-with-minimal-effort is ML use case
https://www.kaggle.com/crequena/starter-s-pack-for-invasives-detection is ML use case
https://www.kaggle.com/finlay/naive-bagging-cnn-pb0-985 is not ML use case
https://www.kaggle.com/fuji

https://www.kaggle.com/yekenot/pooled-gru-fasttext is not ML use case
https://www.kaggle.com/abhishek/pytorch-bert-inference is ML use case
https://www.kaggle.com/adityaecdrid/public-version-text-cleaning-vocab-65 is ML use case
https://www.kaggle.com/artgor/cnn-in-keras-on-folds is ML use case
https://www.kaggle.com/bminixhofer/simple-lstm-pytorch-version is not ML use case
https://www.kaggle.com/bminixhofer/speed-up-your-rnn-with-sequence-bucketing is ML use case
https://www.kaggle.com/christofhenkel/how-to-preprocessing-for-glove-part1-eda is not ML use case
https://www.kaggle.com/christofhenkel/how-to-preprocessing-for-glove-part2-usage is not ML use case
https://www.kaggle.com/christofhenkel/keras-baseline-lstm-attention-5-fold is not ML use case
https://www.kaggle.com/christofhenkel/loading-bert-using-pytorch-with-tokenizer-apex is not ML use case
https://www.kaggle.com/dborkan/benchmark-kernel is ML use case
https://www.kaggle.com/ekhtiar/unintended-eda-with-tutorial-notes is ML

https://www.kaggle.com/myonin/music-recommendation-random-forest-xgboost is ML use case
https://www.kaggle.com/priyaananthram/eda-of-music-recommendation-system is ML use case
https://www.kaggle.com/rohandx1996/recommendation-system-with-83-accuracy-lgbm is ML use case
https://www.kaggle.com/sidshady/basic-data-analysis-and-exploration is ML use case
https://www.kaggle.com/vinnsvinay/introduction-to-boosting-using-lgbm-lb-0-68357 is ML use case
https://www.kaggle.com/warpri81/kkbox-cf-svd is not ML use case
https://www.kaggle.com/aakashkerawat/exploring-and-predicting-lb-score-0-60160 is ML use case
https://www.kaggle.com/anokas/implementing-xgboost is not ML use case
https://www.kaggle.com/apapiu/exploring-kobe-s-shots is not ML use case
https://www.kaggle.com/arjoonn/preliminary-exploration is ML use case
https://www.kaggle.com/bbx396/kobechart is not ML use case
https://www.kaggle.com/brandao/winner-script is not ML use case
https://www.kaggle.com/brandao/xgboost-in-r-kobe-bryant-be

https://www.kaggle.com/lyakaap/fast-resized-image-download-python-3 is ML use case
https://www.kaggle.com/maxwell110/python3-version-image-downloader is not ML use case
https://www.kaggle.com/mxdbld/simple-exploration-of-google-recognition is ML use case
https://www.kaggle.com/plarmuseau/open-dataset-theano-tensor-first-image is not ML use case
https://www.kaggle.com/tobwey/landmark-recognition-challenge-image-downloader is not ML use case
https://www.kaggle.com/wesamelshamy/extract-geographical-info-in-image-description is ML use case
https://www.kaggle.com/wesamelshamy/image-feature-extraction-and-matching-for-newbies is ML use case
https://www.kaggle.com/wolfgangb33r/landmark-recognition-train-a-first-keras-model is not ML use case
https://www.kaggle.com/anisayari/download-images-dataset-python3-log-progressbar is not ML use case
https://www.kaggle.com/automatichourglass/create-a-subset-of-training-dataset is not ML use case
https://www.kaggle.com/grapestone5321/exploration-of-the-d

https://www.kaggle.com/madcap/r-xgboost-starter-script is not ML use case
https://www.kaggle.com/mmueller/xgb-feature-importance-python is not ML use case
https://www.kaggle.com/nigelcarpenter/r-xgboost-gini-v2 is not ML use case
https://www.kaggle.com/nigelcarpenter/r-xgboost-with-gini-eval-and-stopping is not ML use case
https://www.kaggle.com/odiseo1982/compare-variables-between-train-and-test is not ML use case
https://www.kaggle.com/oxofff/gini-scorer-cv-gridsearch is not ML use case
https://www.kaggle.com/rocelot/factortonumeric is not ML use case
https://www.kaggle.com/rshah4/correlation-matrix-visualization is not ML use case
https://www.kaggle.com/rshah4/histogram-of-all-fields-with-labels is not ML use case
https://www.kaggle.com/soutik/blah-xgb is not ML use case
https://www.kaggle.com/tdevries/calculating-normalized-gini-coefficient is not ML use case
https://www.kaggle.com/titericz/done-done-3 is not ML use case
https://www.kaggle.com/vikasrtr/histogram-of-all-fields is no

https://www.kaggle.com/allunia/m5-sales-uncertainty-prediction is ML use case
https://www.kaggle.com/chrisrichardmiles/m5u-wsplevaluator-weighted-scaled-pinball-loss is ML use case
https://www.kaggle.com/headsortails/back-to-predict-the-future-interactive-m5-eda is not ML use case
https://www.kaggle.com/holoong9291/eda-for-m5-2-en is ML use case
https://www.kaggle.com/kamalnaithani/m5uncertainity-score is not ML use case
https://www.kaggle.com/kneroma/fast-wsp-loss-implementation-5s is ML use case
https://www.kaggle.com/kneroma/from-point-to-uncertainty-prediction is not ML use case
https://www.kaggle.com/konradb/beat-the-benchmark-snaive is not ML use case
https://www.kaggle.com/mpware/quantile-regression-cv3-tf is ML use case
https://www.kaggle.com/nxrprime/coefficient-multiplier is not ML use case
https://www.kaggle.com/robertburbidge/lightgbm-poisson-w-scaled-pinball-loss is not ML use case
https://www.kaggle.com/rohanrao/m5-the-weighing-scale is not ML use case
https://www.kaggle.

https://www.kaggle.com/vikassingh1996/ncaa-march-madness-exploratory-analysis-fe is not ML use case
https://www.kaggle.com/alijs1/experiments-with-spectrograms is not ML use case
https://www.kaggle.com/andy101/exploring-the-channel-energies is ML use case
https://www.kaggle.com/anokas/seizure-boosting is ML use case
https://www.kaggle.com/asterios/proper-cross-validation is ML use case
https://www.kaggle.com/avilesmarcel/open-mat-in-python-pandas-dataframe is not ML use case
https://www.kaggle.com/bzamecnik/brain-sounds is not ML use case
https://www.kaggle.com/changgyu/another-data-corruption is not ML use case
https://www.kaggle.com/deepcnn/feature-extractor-matlab2python-translated is ML use case
https://www.kaggle.com/deepcnn/spectrogram-pairs is not ML use case
https://www.kaggle.com/jeffhebert/seizure-spectrograms is not ML use case
https://www.kaggle.com/openneuron/begin-with-r-generate-features-2 is not ML use case
https://www.kaggle.com/pakozm/dropoutcounts is not ML use case


https://www.kaggle.com/cdeotte/neural-network-malware-0-67 is ML use case
https://www.kaggle.com/cdeotte/private-leaderboard-0-750 is ML use case
https://www.kaggle.com/cdeotte/time-series-eda-malware-0-64 is ML use case
https://www.kaggle.com/cdeotte/time-split-validation-malware-0-68 is ML use case
https://www.kaggle.com/datark1/malware-prediction-eda is not ML use case
https://www.kaggle.com/fabiendaniel/detecting-malwares-with-lgbm is ML use case
https://www.kaggle.com/guoday/nffm-baseline-0-690-on-lb is not ML use case
https://www.kaggle.com/guoday/xdeepfm-baseline is not ML use case
https://www.kaggle.com/jiegeng94/everyone-do-this-at-the-beginning is ML use case
https://www.kaggle.com/kailex/ms-malware-starter is not ML use case
https://www.kaggle.com/rquintino/2-months-train-1-month-public-1-day-private is not ML use case
https://www.kaggle.com/shrutimechlearn/large-data-loading-trick-with-ms-malware-data is not ML use case
https://www.kaggle.com/sjb1988/lgb-python-basic-featur

https://www.kaggle.com/aleksandradeis/nfl-injury-analysis is not ML use case
https://www.kaggle.com/benjenkins96/nfl-1st-and-future-analysis is not ML use case
https://www.kaggle.com/bgpablo/visualization-of-player-movement-speed-accel is not ML use case
https://www.kaggle.com/chandraroy/nfl-analytics is not ML use case
https://www.kaggle.com/david289/nfl-lower-limb-non-contact-injuries-analysis is ML use case
https://www.kaggle.com/docxian/nfl-data-journey-and-simple-model is ML use case
https://www.kaggle.com/elijah24/nfl-injuries is ML use case
https://www.kaggle.com/habbeda/contradictory-discovery-solved is not ML use case
https://www.kaggle.com/jaseziv83/an-analysis-of-nfl-injuries is not ML use case
https://www.kaggle.com/jpmiller/how-to-adjust-orientation is not ML use case
https://www.kaggle.com/jpmiller/nfl-1standfuture-report is not ML use case
https://www.kaggle.com/jpmiller/using-track-data-with-small-memory is not ML use case
https://www.kaggle.com/krishm/nfl-1st-and-futur

https://www.kaggle.com/maxkapsecker/pca-pattern-discovery is ML use case
https://www.kaggle.com/mbkinaci/eda-xgboost-ridge-knn-extratrees-regression is ML use case
https://www.kaggle.com/pecooper/xgboost-benchmark-simple-usage-of-geometry-files is not ML use case
https://www.kaggle.com/srserves85/boosting-stacking-and-bayes-searching is ML use case
https://www.kaggle.com/sudhirnl7/simple-ann is ML use case
https://www.kaggle.com/sudhirnl7/simple-electron-volt-predictor is ML use case
https://www.kaggle.com/tejasrinivas/xgb-starter-0-0584-on-lb is not ML use case
https://www.kaggle.com/tonyyy/find-the-same-geometry is not ML use case
https://www.kaggle.com/tonyyy/how-to-get-atomic-coordinates is not ML use case
https://www.kaggle.com/tunguz/simple-catboost is not ML use case
https://www.kaggle.com/aakashnain/eda-nytaxi is ML use case
https://www.kaggle.com/abhishek/no-crap-only-models is not ML use case
https://www.kaggle.com/crailtap/basic-network-analysis-tutorial is not ML use case
h

https://www.kaggle.com/miklgr500/linear-decay-based-on-resnet-cnn is ML use case
https://www.kaggle.com/nxrprime/fibrosis-eda-fast-ai is ML use case
https://www.kaggle.com/piantic/osic-pulmonary-fibrosis-progression-basic-eda is ML use case
https://www.kaggle.com/rohanrao/osic-understanding-laplace-log-likelihood is ML use case
https://www.kaggle.com/titericz/tabular-simple-eda-linear-model is ML use case
https://www.kaggle.com/twinkle0705/your-starter-notebook-for-osic is not ML use case
https://www.kaggle.com/ulrich07/osic-multiple-quantile-regression-starter is not ML use case
https://www.kaggle.com/vbmokin/higher-lb-score-by-tuning-mloss-upgrade-visual is not ML use case
https://www.kaggle.com/yasufuminakama/osic-lgb-baseline is not ML use case
https://www.kaggle.com/abhishek/beating-the-benchmark-v2-0 is not ML use case
https://www.kaggle.com/ankitdatascience/random-and-bayes-search-hyp-optimization-gpu is ML use case
https://www.kaggle.com/benhamner/random-forest-benchmark-r-1 is

https://www.kaggle.com/skooch/petfinder-simple-lgbm-baseline is ML use case
https://www.kaggle.com/vshakhray/dls-petfinder is ML use case
https://www.kaggle.com/wrosinski/baselinemodeling is ML use case
https://www.kaggle.com/wuyhbb/final-small is not ML use case
https://www.kaggle.com/arnoudcommandeur/visualisation-of-trips-by-google-maps is not ML use case
https://www.kaggle.com/arunkt/visualization-of-taxi-trip-end-points is not ML use case
https://www.kaggle.com/ashamli/all-trips is not ML use case
https://www.kaggle.com/benhamner/last-location-benchmark is not ML use case
https://www.kaggle.com/benhamner/test-trips-map is not ML use case
https://www.kaggle.com/gopaldutt/test-trips-map is not ML use case
https://www.kaggle.com/hochthom/visualization-of-taxi-trip-end-points is not ML use case
https://www.kaggle.com/jihyeseo/handle-rds-and-zip-eda is not ML use case
https://www.kaggle.com/joncle/notebook0f2646ced6 is not ML use case
https://www.kaggle.com/joncle/notebookc2931820a9 is

https://www.kaggle.com/gaborvecsei/plant-seedlings-fun-with-computer-vision is ML use case
https://www.kaggle.com/gaborvecsei/plants-t-sne is not ML use case
https://www.kaggle.com/kmader/pretrained-vgg16-w-attention-for-seedlings is ML use case
https://www.kaggle.com/matrixb/cnn-svm-xgboost is not ML use case
https://www.kaggle.com/meenavyas/plant-seedlings-classification is not ML use case
https://www.kaggle.com/miklgr500/keras-simple-model-0-97103-best-public-score is not ML use case
https://www.kaggle.com/nikkonst/plant-seedlings-with-cnn-and-image-processing is ML use case
https://www.kaggle.com/omkarsabnis/seedling-classification-using-cnn-v13-0-95 is ML use case
https://www.kaggle.com/oysteijo/just-some-simple-train-data-investigation is not ML use case
https://www.kaggle.com/praanj/basic-keras-cnn-with-startified-kfold-evaluation is ML use case
https://www.kaggle.com/praanj/transfer-learning-vgg-19-resnet-50-with-kfold is ML use case
https://www.kaggle.com/raoulma/plants-xcepti

https://www.kaggle.com/akensert/panda-optimized-tiling-tf-data-dataset is ML use case
https://www.kaggle.com/debanga/let-s-enhance-the-images is ML use case
https://www.kaggle.com/dhananjay3/panda-eda-all-you-need-to-know is ML use case
https://www.kaggle.com/gpreda/panda-challenge-starting-eda is ML use case
https://www.kaggle.com/haqishen/panda-inference-w-36-tiles-256 is ML use case
https://www.kaggle.com/haqishen/train-efficientnet-b0-w-36-tiles-256-lb0-87 is ML use case
https://www.kaggle.com/iafoss/panda-16x128x128-tiles is ML use case
https://www.kaggle.com/iafoss/panda-concat-tile-pooling-starter-0-79-lb is ML use case
https://www.kaggle.com/iafoss/panda-concat-tile-pooling-starter-inference is ML use case
https://www.kaggle.com/iamleonie/panda-eda-visualizations-suspicious-data is ML use case
https://www.kaggle.com/reighns/understanding-the-quadratic-weighted-kappa is ML use case
https://www.kaggle.com/rftexas/better-image-tiles-removing-white-spaces is ML use case
https://www

https://www.kaggle.com/anokas/data-analysis-xgboost-starter-0-35460-lb is ML use case
https://www.kaggle.com/ashhafez/temporal-pattern-in-train-response-rates is ML use case
https://www.kaggle.com/cpmpml/spell-checker-using-word2vec is ML use case
https://www.kaggle.com/currie32/the-importance-of-cleaning-text is ML use case
https://www.kaggle.com/davidthaler/duplicates-of-duplicates is ML use case
https://www.kaggle.com/davidthaler/how-many-1-s-are-in-the-public-lb is ML use case
https://www.kaggle.com/hubert0527/spacy-name-entity-recognition is ML use case
https://www.kaggle.com/jeffd23/visualizing-word-vectors-with-t-sne is ML use case
https://www.kaggle.com/jturkewitz/magic-features-0-03-gain is not ML use case
https://www.kaggle.com/liananapalkova/simply-about-word2vec is ML use case
https://www.kaggle.com/lystdo/lb-0-18-lstm-with-glove-and-magic-features is not ML use case
https://www.kaggle.com/lystdo/lstm-with-word2vec-embeddings is not ML use case
https://www.kaggle.com/philsc

https://www.kaggle.com/danofer/getting-started-baseline is not ML use case
https://www.kaggle.com/docxian/first-look-preparation-save-as-rdata is not ML use case
https://www.kaggle.com/econdata/reducing-commercial-aviation-fatalities is not ML use case
https://www.kaggle.com/kamalchhirang/forgot-to-shuffle-the-data-while-splitting-boom is not ML use case
https://www.kaggle.com/mahmoud86/eda-rdf-boosting-kn is ML use case
https://www.kaggle.com/mrbruce/aviation-fatalities-data-insight is ML use case
https://www.kaggle.com/opoliakova/data-analysis-and-manipulation-sklearn-modeling is not ML use case
https://www.kaggle.com/plarmuseau/fork-of-aviation-second-tempt is ML use case
https://www.kaggle.com/robbiebeane/commercial-aviation-v01 is not ML use case
https://www.kaggle.com/sarmat/imbalance-learning-examples is ML use case
https://www.kaggle.com/sarmat/sklearn-lgbm-ensemble-baseline is not ML use case
https://www.kaggle.com/shahaffind/reducing-commercial-aviation-fatalities-11th is not

https://www.kaggle.com/khyeh0719/stratified-validation-strategy is ML use case
https://www.kaggle.com/kozodoi/lightgbm-on-meta-features is ML use case
https://www.kaggle.com/nitindatta/pulmonary-embolism-dicom-preprocessing-eda is not ML use case
https://www.kaggle.com/orkatz2/cnn-lstm-pytorch-train is not ML use case
https://www.kaggle.com/orkatz2/pulmonary-embolism-pytorch-train is ML use case
https://www.kaggle.com/orkatz2/resnext-pulmonary-embolism-inference is not ML use case
https://www.kaggle.com/osciiart/baseline-with-no-image is ML use case
https://www.kaggle.com/paulorzp/mean-baseline is not ML use case
https://www.kaggle.com/redwankarimsony/rsna-str-3d-stacking-3d-plot-segmentation is ML use case
https://www.kaggle.com/redwankarimsony/rsna-str-pe-gradient-sigmoid-windowing is ML use case
https://www.kaggle.com/rythian47/vision-transformer-goodbye-cnn-training is ML use case
https://www.kaggle.com/seraphwedd18/pe-detection-with-keras-model-creation is ML use case
https://www.

https://www.kaggle.com/mathormad/knowledge-distillation-with-nn-rankgauss is ML use case
https://www.kaggle.com/mjbahmani/santander-ml-explainability is ML use case
https://www.kaggle.com/roydatascience/eda-pca-simple-lgbm-on-kfold-technique is ML use case
https://www.kaggle.com/vinhnguyen/gpu-acceleration-for-lightgbm is ML use case
https://www.kaggle.com/yag320/list-of-fake-samples-and-public-private-lb-split is not ML use case
https://www.kaggle.com/alabsinatheer/comprehensive-exploration-and-visualization-1-1 is not ML use case
https://www.kaggle.com/anokas/collaborative-filtering-btb-lb-0-01691 is not ML use case
https://www.kaggle.com/apryor6/detailed-cleaning-visualization is not ML use case
https://www.kaggle.com/apryor6/detailed-cleaning-visualization-python is ML use case
https://www.kaggle.com/delemeator/xgb-starter is not ML use case
https://www.kaggle.com/donyoe/santander-quick-first-view is not ML use case
https://www.kaggle.com/jturkewitz/reduce-size-of-dataset-to-1-gb i

https://www.kaggle.com/outrunner/trackml-2-solution-example is ML use case
https://www.kaggle.com/pranav84/a-beginner-s-guide-to-cern-s-trackml-challenge is not ML use case
https://www.kaggle.com/shivamb/trajectory-animation-eda is ML use case
https://www.kaggle.com/sionek/bayesian-optimization is not ML use case
https://www.kaggle.com/sionek/mod-dbscan-x-100-parallel is not ML use case
https://www.kaggle.com/wesamelshamy/trackml-problem-explanation-and-data-exploration is not ML use case
https://www.kaggle.com/yuval6967/7th-place-clustering-extending-ml-merging-0-75 is ML use case
https://www.kaggle.com/tarunaryyan/imputation-for-missing-values-in-features is not ML use case
https://www.kaggle.com/akshatpathak/text-data-clustering is ML use case
https://www.kaggle.com/anokas/frequent-words-model-v2 is not ML use case
https://www.kaggle.com/charlescostello/transfer-learning-on-stack-exchange-tags is ML use case
https://www.kaggle.com/eliotbarr/word-clouds is not ML use case
https://www

https://www.kaggle.com/enrique1500/rental-listing-ny-map is not ML use case
https://www.kaggle.com/guoday/cv-statistics-better-parameters-and-explaination is not ML use case
https://www.kaggle.com/jxnlco/deduplicating-features is not ML use case
https://www.kaggle.com/kashnitsky/topic-6-feature-engineering-and-feature-selection is ML use case
https://www.kaggle.com/neviadomski/data-exploration-two-sigma-renthop is not ML use case
https://www.kaggle.com/poonaml/two-sigma-renthop-eda is ML use case
https://www.kaggle.com/rakhlin/another-python-version-of-it-is-lit-by-branden is not ML use case
https://www.kaggle.com/robertoruiz/feature-engineering-1-sentiment-analysis is not ML use case
https://www.kaggle.com/somnisight/microsoft-lightgbm-starter is ML use case
https://www.kaggle.com/stanislavushakov/python-version-of-it-is-lit-by-branden is not ML use case
https://www.kaggle.com/sudalairajkumar/simple-exploration-notebook-2-connect is not ML use case
https://www.kaggle.com/sudalairajkum

https://www.kaggle.com/braquino/5-fold-lstm-attention-fully-commented-0-694 is ML use case
https://www.kaggle.com/braquino/vsb-power-lstm-attention is ML use case
https://www.kaggle.com/go1dfish/basic-eda is ML use case
https://www.kaggle.com/jackvial/dwt-signal-denoising is not ML use case
https://www.kaggle.com/jeffreyegan/vsb-power-line-fault-detection-approach is ML use case
https://www.kaggle.com/junkoda/handmade-features is ML use case
https://www.kaggle.com/mark4h/vsb-1st-place-solution is ML use case
https://www.kaggle.com/miklgr500/flatiron is ML use case
https://www.kaggle.com/ratthachat/demythifying-matthew-correlation-coefficients-mcc is ML use case
https://www.kaggle.com/roydatascience/eda-iso-pca-lle-stratified-lstm-attention is ML use case
https://www.kaggle.com/sohier/reading-the-data-with-python is not ML use case
https://www.kaggle.com/suicaokhoailang/5-fold-lstm-with-threshold-tuning-0-618-lb is ML use case
https://www.kaggle.com/suicaokhoailang/transformer-baseline-

https://www.kaggle.com/ashishpatel26/scrumptious-cooking-foods is ML use case
https://www.kaggle.com/ashishpatel26/think-differently-what-s-cooking is ML use case
https://www.kaggle.com/codename007/cooking-cooking-cooking is not ML use case
https://www.kaggle.com/gloriahristova/a-walkthrough-eda-vizualizations-unigram-model is not ML use case
https://www.kaggle.com/josephgpinto/cooking-is-chemistry-really is not ML use case
https://www.kaggle.com/mmotoki/word-embeddings-cuisine-similarity is ML use case
https://www.kaggle.com/nicapotato/this-model-is-bland-simple-logistic-starter is ML use case
https://www.kaggle.com/nulldata/unsupervised-cuisine-ingredients-creation is not ML use case
https://www.kaggle.com/rajmehra03/a-detailed-explanation-of-keras-embedding-layer is ML use case
https://www.kaggle.com/rejasupotaro/let-s-cook-model is ML use case
https://www.kaggle.com/rejasupotaro/representations-for-ingredients is ML use case
https://www.kaggle.com/rejasupotaro/what-are-ingredients 

https://www.kaggle.com/darrellulm/google-cloud-youtube-8m-video-testrun is not ML use case
https://www.kaggle.com/drn01z3/keras-baseline-on-video-features-0-7941-lb is not ML use case
https://www.kaggle.com/evanmiller/basic-eda is ML use case
https://www.kaggle.com/hagerrady/starter-explore-youtube8m-sample-data is ML use case
https://www.kaggle.com/kuixui/rnn-test is not ML use case
https://www.kaggle.com/mikell/most-popular-labels is not ML use case
https://www.kaggle.com/mnds18/eda-explore-youtube8m-sample-data is not ML use case
https://www.kaggle.com/neilzhang/youtube-8m is not ML use case
https://www.kaggle.com/nik0lashka/0-7941-lb-outofmemory-fix-generator-fix is not ML use case
https://www.kaggle.com/philschmidt/youtube8m-eda is ML use case
https://www.kaggle.com/pmw9440/notebook1ee341b1c9 is not ML use case
https://www.kaggle.com/veerabhadrappa/youtube8m-explorer is ML use case
https://www.kaggle.com/vlarine/most-popular-labels is not ML use case
https://www.kaggle.com/wendyka

In [16]:
# print size of gathered csv
print(df.shape)
print(df2.shape)

(2069, 21)
(2476, 21)
