# kgl

> Command line utilities for kaggle competitions

In [None]:
#| default_exp kgl

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from fastcore.all import *
from fastkaggle import *
import itertools as it

from oztools.core import *
from oztools.nbd import *

import os,json,subprocess, shutil
import re

This module requires kaggle API token in order to work.
See [here](https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md#api-credentials) for info
on how to setup that.

## Competition utils

Modified version of `setup_comp` from `fastkaggle`.
I like to put my data into `data` folders so it's easier to mask them in version control.

In [None]:
#|export
def setup_comp(competition, install=''):
    "Get a path to data for `competition`, downloading it if needed"
    if iskaggle:
        if install:
            os.system(f'pip install -Uqq {install}')
        return Path('../input')/competition
    else:
        path = Path('./var')/competition
        api = import_kaggle()
        if not path.exists():
            import zipfile
            api.competition_download_cli(str(competition), path='./var')
            zipfile.ZipFile(f'./var/{competition}.zip').extractall(path)
        return path

## Setup competition projects

In [None]:
api = import_kaggle()
comps = api.competitions_list()
comp = comps[0]
comp.title, comp.url.split("/")[-1]

('AI Mathematical Olympiad - Progress Prize 2',
 'ai-mathematical-olympiad-progress-prize-2')

In [None]:
len("equity-post-HCT-survival-prediction  ")

37

In [None]:
#| export
def disp_comp(comp):
    slug = comp.url.split("/")[-1]
    return f"{pad(slug[:40],40)} {comp.title[:40]}"

In [None]:
joinedkey = attrkey("user_has_entered")

In [None]:
comps.sort(key=joinedkey)
active, entered = (list(y) for x,y in it.groupby(comps, lambda x: x.user_has_entered))

In [None]:
#| export
def get_competitions():
    api = import_kaggle()
    comps = api.competitions_list()
    
    joinedkey = attrkey("user_has_entered")
    comps.sort(key=joinedkey)
    active, entered = (list(y) for x,y in it.groupby(comps, joinedkey))
    return active, entered

In [None]:
active, entered = get_competitions()
active[:1], entered[:1]

([{"id": 86023, "ref": "https://www.kaggle.com/competitions/ai-mathematical-olympiad-progress-prize-2", "title": "AI Mathematical Olympiad - Progress Prize 2", "url": "https://www.kaggle.com/competitions/ai-mathematical-olympiad-progress-prize-2", "description": "Solve national-level math challenges using artificial intelligence models", "organizationName": "AI|MO", "organizationRef": "", "category": "Featured", "reward": "2,117,152 Usd", "tags": [{"ref": "nlp", "name": "nlp", "description": "Natural Language Processing gives a computer program the ability to extract meaning human language. Applications include sentiment analysis, translation, and speech recognition.", "fullPath": "analysis > nlp", "competitionCount": 89, "datasetCount": 4512, "scriptCount": 8533, "totalCount": 13134}, {"ref": "mathematics", "name": "mathematics", "description": "", "fullPath": "subject > mathematics", "competitionCount": 4, "datasetCount": 120, "scriptCount": 179, "totalCount": 303}, {"ref": "accuracy

In [None]:
#| export
@call_parse
def kgl_list():
    "List kaggle competitions"

    active, entered = get_competitions()

    return '\n'.join(("Joined:", *str_enumerate(map(disp_comp, entered), 1),
                      "Active:", *str_enumerate(map(disp_comp, active), 1+len(entered))))

In [None]:
print(kgl_list())

Joined:
  1   playground-series-s5e3                   Binary Prediction with a Rainfall Datase
  2   store-sales-time-series-forecasting      Store Sales - Time Series Forecasting
Active:
  3   ai-mathematical-olympiad-progress-prize- AI Mathematical Olympiad - Progress Priz
  4   stanford-rna-3d-folding                  Stanford RNA 3D Folding
  5   byu-locating-bacterial-flagellar-motors- BYU - Locating Bacterial Flagellar Motor
  6   march-machine-learning-mania-2025        March Machine Learning Mania 2025
  7   drawing-with-llms                        Drawing with LLMs
  8   birdclef-2025                            BirdCLEF+ 2025
  9   titanic                                  Titanic - Machine Learning from Disaster
  10  home-data-for-ml-course                  Housing Prices Competition for Kaggle Le
  11  house-prices-advanced-regression-techniq House Prices - Advanced Regression Techn
  12  spaceship-titanic                        Spaceship Titanic
  13  digit-recognizer     

In [None]:
#| export
def maybe_int(x: str):
    try:
        return int(x)
    except ValueError:
        return x

In [None]:
comp = comps[0]

In [None]:
comp.url

'https://www.kaggle.com/competitions/ai-mathematical-olympiad-progress-prize-2'

In [None]:
comp.title

'AI Mathematical Olympiad - Progress Prize 2'

In [None]:
#| export
def get_competition(n: str):
    active, entered = get_competitions()
    comps = entered + active

    try:
        try: return comps[int(n)-1]
        except ValueError: return L(comps).filter(lambda x: n in x.url.split("/")[-1])[0]
    except IndexError:
        warn("Couldn't find competition")
        return

In [None]:
#| export
@call_parse
def kgl_new(n: str, # competition id or name
            save_to: str # project name to use locally and for github
            ):
    "Setup nbdev environment for a kaggle competition"
    comp = get_competition(n)
    if not comp:
        return

    save_to = "kaggle_" + save_to
    
    print(f'Loading competition "{comp.title}" into "{save_to}"')
    nbd_new_fn(save_to, f'Code for [{comp.title}]({comp.url}) Kaggle competition')

## Adopted from fastkaggle
Changes:
- Allow uploading current project even if it's not on pip
- Kaggle API changed since 3 years ago, so had to fix code

In [None]:
#| export
def create_lib_dataset(ds_name,
                       lib_source,
                       lib_path, # Local path to dl/create dataset
                       username, # You username
                       clear_after=False # Delete local copies after sync with kaggle?
                       ):
    '''For each library, create or update a kaggle dataset with the latest version'''    
    retain = ["dataset-metadata.json"]

    lib = lib_source
    title = f"library-{ds_name}"
    local_path = lib_path/title
    print(f"{lib} | Processing as {title} at {local_path}")
    if Path(local_path).exists(): shutil.rmtree(local_path)

    print(f"{lib} | Downloading or Creating Dataset")
    try: get_dataset(local_path,f"{username}/{title}",force=True)
    except Exception as ex:
        if '404' or '403' in str(ex): mk_dataset(local_path,title,force=True)
        else: raise ex
        
    print(f"{lib} | Checking dataset version against pip")
    ver_local_orig = get_local_ds_ver(lib_path,lib)

    for item in local_path.ls():
        if item.name not in retain: 
            if item.is_dir(): shutil.rmtree(item)
            else: item.unlink()
    get_pip_library(local_path,lib)
    ver_local_new = get_local_ds_ver(lib_path,lib)
    if (ver_local_new != ver_local_orig) or (ver_local_new==None and ver_local_orig==None): 
        print(f"{lib} | Updating {lib} in Kaggle from {ver_local_orig} to {ver_local_new}")
        
        push_dataset(local_path,ifnone (ver_local_new, "Version Unknown"))
    else: print(f"{lib} | Kaggle dataset already up to date {ver_local_orig} to {ver_local_new}")
    if clear_after: shutil.rmtree(local_path)
    print(f"{lib} | Complete")

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()