From 15d084bf0c98fd03e4e5404c94117a593c13a373 Mon Sep 17 00:00:00 2001 From: Mike Levin Date: Wed, 25 Mar 2026 07:35:34 -0400 Subject: [PATCH] Finalized logical numbering sequence for Advanced Notebooks --- .../Advanced_Notebooks/01_URLinspector.ipynb | 246 ++++++++++++++++++ foo_files.py | 10 +- 2 files changed, 251 insertions(+), 5 deletions(-) create mode 100644 assets/nbs/Advanced_Notebooks/01_URLinspector.ipynb diff --git a/assets/nbs/Advanced_Notebooks/01_URLinspector.ipynb b/assets/nbs/Advanced_Notebooks/01_URLinspector.ipynb new file mode 100644 index 00000000..08debd07 --- /dev/null +++ b/assets/nbs/Advanced_Notebooks/01_URLinspector.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# URLinspector πŸ”¬\n", + "\n", + "> \"I will look at *this* URL, and I will tell you about it.\"\n", + "\n", + "Welcome to your first targeted strike. We are going to look at a single page, pull it into our local reality, and expose the **JavaScript Gap**β€”the massive blind spot that cheap cloud scrapers have because they don't actually render the page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Allows adjusting secret sauce recipe adjustments\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from pipulate import wand\n", + "from imports import url_inspect_sauce as sauce\n", + "import nest_asyncio\n", + "nest_asyncio.apply()\n", + "\n", + "job = \"urlinspector-01\" \n", + "wand.speak(\"Wand initialized. Give me a target, boss.\")" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "### 🎯 The Target\n", + "Enter exactly **one** URL below. We keep things 1-to-1 here to build our mental model." + ] + }, + { + "cell_type": "raw", + "id": "4", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [ + "url-list-input" + ] + }, + "source": [ + "# Enter one URL per line\n", + "https://nixos.org/ # Linux\n", + "https://jupyter.org/ # Python\n", + "https://neovim.io/ # vim\n", + "https://git-scm.com/ # git\n", + "https://www.fastht.ml/ # FastHTML\n", + "https://pipulate.com/ # AIE (Pronounced \"Ayyy\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "# 1. The Scrape (Using the core topological parser to find your URL)\n", + "wand.speak(\"Engaging stealth browser. Let's see what the cheap scrapers are missing.\")\n", + "extracted_data = await sauce.scrape(job, headless=False, delay_range=None)\n", + "\n", + "# 2. The Optics\n", + "wand.speak(\"Shattering the DOM into LLM Optics...\")\n", + "await sauce.generate_extractions_post_scrape(job, verbose=True)\n", + "\n", + "wand.imperio()" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "### πŸ₯ž Stack 'Em\n", + "Let's pull the extracted SEO metadata from our local file system back into the Notebook's memory using a Pandas DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the data from the 'seo.md' artifacts\n", + "seo_df = sauce.stack_seo_data(job)\n", + "\n", + "import pandas as pd\n", + "from IPython.display import display\n", + "display(seo_df)\n", + "\n", + "wand.speak(\"Data stacked. Now, let's summon Statler and Waldorf.\")\n", + "wand.imperio()" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "### 🎭 The Prompt-Fu (Manual Cloud Egress)\n", + "\n", + "Pipulate isn't just about local AI; it's about preparing pristine data to feed to Frontier Models (ChatGPT, Claude, Gemini). \n", + "\n", + "Run the cell below. It will generate a prompt. **Copy that prompt and paste it into your favorite web-based ChatBot.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "target_url = seo_df['url'].iloc[0]\n", + "\n", + "prompt_text = f\"\"\"\n", + "Act as Statler and Waldorf from the Muppets (with a dash of MST3K). \n", + "\n", + "I am an SEO consultant, and I just used a heavy-duty local browser automation tool to render the full DOM for this URL: {target_url}. \n", + "\n", + "Here is the pristine SEO metadata we extracted from the rendered reality:\n", + "{seo_df.to_csv(index=False)}\n", + "\n", + "Write a snarky, cynical email to a client. Explain why \"cheap-ass AI scrapers\" that only read the raw 'View Source' HTML are completely blind to the modern web (the 'JavaScript Gap'). Use the data provided to prove that we actually see the real page. \n", + "\n", + "End with a brief, 3-point actionable agenda to fix their technical SEO, but keep the grumbling, old-man theater critic persona going the whole time.\n", + "\"\"\"\n", + "\n", + "print(\"πŸ‘‡ COPY THIS PROMPT AND PASTE IT INTO CHATGPT/CLAUDE/GEMINI πŸ‘‡\\n\")\n", + "print(prompt_text)\n", + "print(\"\\nπŸ‘† -------------------------------------------------------- πŸ‘†\")\n", + "\n", + "wand.speak(\"I've prepared your Prompt Fu. Copy it to your clipboard, paste it into the cloud oracle of your choice, and enjoy the snark.\")" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "### 🎨 The Professional Egress\n", + "\n", + "Your clients probably don't want to read a Jupyter Notebook. They want an Excel file. Let's trigger the `core_sauce.py` formatting engine to build a boardroom-ready deliverable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "xl_file = sauce.export_audits_to_excel(job, seo_df)\n", + "\n", + "if xl_file:\n", + " print(f\"\\nπŸŽ‰ Success! Deliverable saved to: {xl_file}\")\n", + " wand.speak(\"I've packaged the raw data into a pristine Excel file for the suits.\")\n", + "else:\n", + " print(\"\\n❌ Error during export.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "from pipulate import wand\n", + "wand.nbup(\"Advanced_Notebooks/01_URLinspector\", modules=(\"url_inspect_sauce\",))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/foo_files.py b/foo_files.py index bf083bef..3fbbcb13 100644 --- a/foo_files.py +++ b/foo_files.py @@ -298,12 +298,12 @@ Notebooks/Onboarding.ipynb # [4,504 tokens | 14,798 bytes] Notebooks/imports/onboard_sauce.py # [2,162 tokens | 9,735 bytes] assets/nbs/Onboarding.ipynb # [2,935 tokens | 9,467 bytes] -assets/nbs/Advanced_Notebooks/FAQuilizer.ipynb # [2,516 tokens | 7,593 bytes] -assets/nbs/Advanced_Notebooks/URLinspector.ipynb # [2,407 tokens | 7,284 bytes] -assets/nbs/Advanced_Notebooks/VIDeditor.ipynb # [569 tokens | 1,670 bytes] -assets/nbs/Advanced_Notebooks/GAPalyzer.ipynb # [9,193 tokens | 31,140 bytes] +assets/nbs/Advanced_Notebooks/01_URLinspector.ipynb +assets/nbs/Advanced_Notebooks/02_FAQuilizer.ipynb +assets/nbs/Advanced_Notebooks/03_GAPalyzer.ipynb +assets/nbs/Advanced_Notebooks/04_VIDeditor.ipynb assets/nbs/imports/core_sauce.py # [811 tokens | 3,362 bytes] -assets/nbs/imports/onboard_sauce.py # [1,773 tokens | 7,952 bytes] +assets/nbs/imports/onboard_sauce.py # [ imports Ceiling Level ] assets/nbs/imports/faq_writer_sauce.py # [6,042 tokens | 26,760 bytes] assets/nbs/imports/url_inspect_sauce.py # [11,434 tokens | 51,733 bytes] assets/nbs/imports/videditor_sauce.py # [937 tokens | 4,098 bytes]