Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,6 @@ ENV/
.idea
example_model.xgb

.DS_Store
.DS_Store

v5.0/
Binary file modified cached-pickles/example_model.pkl
Binary file not shown.
Binary file modified cached-pickles/feature_neutralization.pkl
Binary file not shown.
Binary file modified cached-pickles/hello_numerai.pkl
Binary file not shown.
Binary file modified cached-pickles/target_ensemble.pkl
Binary file not shown.
72 changes: 30 additions & 42 deletions example_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ekw8Z93ljC3v",
"outputId": "675ac893-5a46-4c6b-dc03-09438941d1fc"
"outputId": "bdd16698-2ad0-4423-b090-c5ce55fe3053"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Python 3.10.12\n"
"Python 3.11.13\n"
]
}
],
Expand All @@ -40,59 +40,47 @@
"base_uri": "https://localhost:8080/"
},
"id": "yoy_wT1rhMqF",
"outputId": "4268fdb0-84d2-4502-97e4-e93a1440c8ee"
"outputId": "e038b50f-1b61-4334-be62-28f4dc40a0a0"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.4/34.4 MB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"lida 0.0.10 requires fastapi, which is not installed.\n",
"lida 0.0.10 requires kaleido, which is not installed.\n",
"lida 0.0.10 requires python-multipart, which is not installed.\n",
"lida 0.0.10 requires uvicorn, which is not installed.\u001b[0m\u001b[31m\n",
"\u001b[0m"
]
}
],
"outputs": [],
"source": [
"# Install dependencies\n",
"!pip install -q numerapi pandas lightgbm cloudpickle==2.2.1 pyarrow scikit-learn scipy==1.10.1"
"!pip install -q --upgrade numerapi pandas pyarrow matplotlib lightgbm scikit-learn scipy cloudpickle==3.1.1"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
"height": 160
},
"id": "13hdRk9ghMqI",
"outputId": "857a4882-83e5-4a76-9b1e-57d6d822cc67"
"outputId": "d2274374-fd85-4189-f27b-d9d466cc63ca"
},
"outputs": [
{
"data": {
"application/javascript": "\n async function download(id, filename, size) {\n if (!google.colab.kernel.accessAllowed) {\n return;\n }\n const div = document.createElement('div');\n const label = document.createElement('label');\n label.textContent = `Downloading \"${filename}\": `;\n div.appendChild(label);\n const progress = document.createElement('progress');\n progress.max = size;\n div.appendChild(progress);\n document.body.appendChild(div);\n\n const buffers = [];\n let downloaded = 0;\n\n const channel = await google.colab.kernel.comms.open(id);\n // Send a message to notify the kernel that we're ready.\n channel.send({})\n\n for await (const message of channel.messages) {\n // Send a message to notify the kernel that we're ready.\n channel.send({})\n if (message.buffers) {\n for (const buffer of message.buffers) {\n buffers.push(buffer);\n downloaded += buffer.byteLength;\n progress.value = downloaded;\n }\n }\n }\n const blob = new Blob(buffers, {type: 'application/binary'});\n const a = document.createElement('a');\n a.href = window.URL.createObjectURL(blob);\n a.download = filename;\n div.appendChild(a);\n a.click();\n div.remove();\n }\n ",
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
"name": "stderr",
"output_type": "stream",
"text": [
"2025-07-25 13:44:58,042 INFO numerapi.utils: starting download\n",
"v5.0/train.parquet: 2.37GB [01:04, 36.7MB/s] \n",
"2025-07-25 13:46:03,017 INFO numerapi.utils: starting download\n",
"v5.0/features.json: 291kB [00:00, 2.75MB/s] \n"
]
},
{
"data": {
"application/javascript": "download(\"download_9cb9b662-7992-47b0-b787-453b845e7050\", \"predict_barebones.pkl\", 6572312)",
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
"name": "stdout",
"output_type": "stream",
"text": [
"[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001168 seconds.\n",
"You can set `force_row_wise=true` to remove the overhead.\n",
"And if memory is not enough, you can set `force_col_wise=true`.\n",
"[LightGBM] [Info] Total Bins 210\n",
"[LightGBM] [Info] Number of data points in the train set: 688184, number of used features: 42\n",
"[LightGBM] [Info] Start training from score 0.500008\n"
]
}
],
"source": [
Expand Down Expand Up @@ -152,7 +140,7 @@
"# Define predict function\n",
"def predict(\n",
" live_features: pd.DataFrame,\n",
" live_benchmark_models: pd.DataFrame\n",
" _live_benchmark_models: pd.DataFrame\n",
" ) -> pd.DataFrame:\n",
" live_predictions = model.predict(live_features[features])\n",
" submission = pd.Series(live_predictions, index=live_features.index)\n",
Expand All @@ -178,7 +166,7 @@
"provenance": []
},
"kernelspec": {
"display_name": "venv",
"display_name": "3.11.13",
"language": "python",
"name": "python3"
},
Expand All @@ -192,7 +180,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.13"
},
"orig_nbformat": 4
},
Expand Down
Loading