From 4d0fccb72aaadc0aad7f3719e0442b4f89fe9a99 Mon Sep 17 00:00:00 2001
From: Ritchie Mwewa <74001397+rly0nheart@users.noreply.github.com>
Date: Thu, 8 May 2025 03:32:27 +0200
Subject: [PATCH 1/2] Added a CLI with code highlighting, and table display for
the results... also, code got refactored
---
README.md | 129 +++++++++++++----
poetry.lock | 191 ++++++++++++++++++++++++-
pyproject.toml | 9 +-
searchcode/__init__.py | 4 +-
searchcode/_main.py | 163 ---------------------
searchcode/api.py | 167 +++++++++++++++++++++
searchcode/cli.py | 140 ++++++++++++++++++
searchcode/{_filters.py => filters.py} | 0
tests/test_searchcode.py | 19 +--
9 files changed, 615 insertions(+), 207 deletions(-)
delete mode 100644 searchcode/_main.py
create mode 100644 searchcode/api.py
create mode 100644 searchcode/cli.py
rename searchcode/{_filters.py => filters.py} (100%)
diff --git a/README.md b/README.md
index 324cbaf..c95a84e 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,32 @@
-

Python SDK for Searchcode.
Search 75 billion lines of code from 40 million projects
+
Python SDK and Command-Line Utility for Searchcode.
Search 75 billion lines of code from 40 million projects
+---
+
+```commandline
+searchcode --help
+```
+
+```python
+from searchcode import Searchcode
+
+sc = Searchcode(user_agent="My-Searchcode-script")
+search = sc.search(query="test")
+
+for result in search.results:
+ print(result)
+```
+
+---
+
## Installation
```bash
pip install searchcode
-
```
-## Documentation
+## Getting Started
### Code Search
@@ -35,65 +52,116 @@ Queries the code index and returns at most 100 results.
> To fetch all results for a given query, keep incrementing `page` parameter until you get a page with an empty results
> list.
+---
-#### Code Search Without Filters
+### Code Search Without Filters
-```python
+#### SDK
-import searchcode as sc
+```python
+from searchcode import Searchcode
-search = sc.code_search(query="test")
+sc = Searchcode(user_agent="My-Searchcode-script")
+search = sc.search(query="test")
for result in search.results:
print(result)
```
-#### Filter by Language (Java and JavaScript)
+#### CLI
-```python
+```commandline
+searchcode test
+```
+
+---
-import searchcode as sc
+### Filter by Language (Java and JavaScript)
-search = sc.code_search(query="test", languages=["Java", "JavaScript"])
+#### SDK
+
+```python
+from searchcode import Searchcode
+
+sc = Searchcode(user_agent="My-Searchcode-script")
+search = sc.search(query="test", languages=["Java", "JavaScript"])
for result in search.results:
print(result.language)
```
-#### Filter by Source (BitBucket and CodePlex)
+#### CLI
-```python
+````commandline
+searchcode test --languages java,javascript
+````
+
+___
-import searchcode as sc
+### Filter by Source (BitBucket and CodePlex)
-search = sc.code_search(query="test", sources=["BitBucket", "CodePlex"])
+#### SDK
+
+```python
+from searchcode import Searchcode
+
+sc = Searchcode(user_agent="My-Searchcode-script")
+search = sc.search(query="test", sources=["BitBucket", "CodePlex"])
for result in search.results:
print(result.filename)
```
-#### Filter by Lines of Code (Between 500 and 1000)
+#### CLI
+
+```commandline
+searchcode test --sources bitbucket,codeplex
+```
+
+___
+
+### Filter by Lines of Code (Between 500 and 1000)
+
+#### SDK
```python
-import searchcode as sc
+from searchcode import Searchcode
-search = sc.code_search(query="test", lines_of_code_gt=500, lines_of_code_lt=1000)
+sc = Searchcode(user_agent="My-Searchcode-script")
+search = sc.search(query="test", lines_of_code_gt=500, lines_of_code_lt=1000)
for result in search.results:
print(result)
```
-#### With Callback Function (JSONP only)
+#### CLI
+
+```commandline
+searchcode test --lines-of-code-gt 500 --lines-of-code-lt 1000
+```
+
+___
+
+### With Callback Function (JSONP only)
+
+#### SDK
```python
-import searchcode as sc
+from searchcode import Searchcode
-search = sc.code_search(query="test", callback="myCallback")
+sc = Searchcode(user_agent="My-Searchcode-script")
+search = sc.search(query="test", callback="myCallback")
print(search)
```
-#### Response Attribute Definitions
+#### CLI
+
+```commandline
+searchcode test --callback myCallback
+```
+
+### Response Attribute Definitions
| Attribute | Description |
|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -124,22 +192,33 @@ print(search)
| **md5hash** | Calculated MD5 hash of the file's contents. |
| **lines** | Contains line numbers and lines which match the `searchterm`. Lines immediately before and after the match are included. If only the filename matches, up to the first 15 lines of the file are returned. |
+___
+
### Code Result
Returns the raw data from a code file given the code id which can be found as the `id` in a code search result.
+#### SDK
+
#### Params
- `_id`: Unique identifier for the code file (required).
```python
-import searchcode as sc
+from searchcode import Searchcode
-code = sc.code_result(4061576)
+sc = Searchcode(user_agent="My-Searchcode-script")
+code = sc.code(4061576)
print(code)
```
+#### CLI
+
+```commandline
+searchode code 4061576
+```
+
## About Searchcode
Searchcode is a simple, comprehensive source code search engine that indexes billions of lines of code from open-source
@@ -148,7 +227,7 @@ helping you find real world examples of functions, API's and libraries in 243 la
[Learn more](https://searchcode.com/about)
-## Acknowledgements
+## Credits
This SDK is developed and maintained by [Ritchie Mwewa](https://gravatar.com/rly0nheart), in collaboration
with [Ben Boyter](https://boyter.org/about/), the creator of [Searchcode.com](https://searchcode.com).
diff --git a/poetry.lock b/poetry.lock
index 7dcb32b..296ee98 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
[[package]]
name = "certifi"
@@ -114,18 +114,45 @@ files = [
{file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"},
]
+[[package]]
+name = "click"
+version = "8.1.8"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
+ {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
[[package]]
name = "colorama"
version = "0.4.6"
description = "Cross-platform colored terminal text."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["dev"]
-markers = "sys_platform == \"win32\""
+groups = ["main", "dev"]
files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
+markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""}
+
+[[package]]
+name = "defusedxml"
+version = "0.7.1"
+description = "XML bomb protection for Python stdlib modules"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["main"]
+files = [
+ {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
+ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
+]
[[package]]
name = "exceptiongroup"
@@ -134,7 +161,7 @@ description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
groups = ["dev"]
-markers = "python_version < \"3.11\""
+markers = "python_version == \"3.10\""
files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@@ -187,6 +214,31 @@ files = [
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
]
+[[package]]
+name = "markdown-it-py"
+version = "3.0.0"
+description = "Python port of markdown-it. Markdown parsing, done right!"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
+ {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
+]
+
+[package.dependencies]
+mdurl = ">=0.1,<1.0"
+
+[package.extras]
+benchmarking = ["psutil", "pytest", "pytest-benchmark"]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
+linkify = ["linkify-it-py (>=1,<3)"]
+plugins = ["mdit-py-plugins"]
+profiling = ["gprof2dot"]
+rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
[[package]]
name = "mccabe"
version = "0.7.0"
@@ -199,6 +251,18 @@ files = [
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
]
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+description = "Markdown URL utilities"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
+ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
+]
+
[[package]]
name = "packaging"
version = "24.2"
@@ -251,6 +315,36 @@ files = [
{file = "pyflakes-3.3.2.tar.gz", hash = "sha256:6dfd61d87b97fba5dcfaaf781171ac16be16453be6d816147989e7f6e6a9576b"},
]
+[[package]]
+name = "pygments"
+version = "2.19.1"
+description = "Pygments is a syntax highlighting package written in Python."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+ {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
+ {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
+]
+
+[package.extras]
+windows-terminal = ["colorama (>=0.4.6)"]
+
+[[package]]
+name = "pyrankvote"
+version = "2.0.6"
+description = "PyRankVote is a python library for different ranked voting methods, like instant-runoff voting, single transferable vote and preferential block voting, created by Jon Tingvold."
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+ {file = "pyrankvote-2.0.6-py3-none-any.whl", hash = "sha256:9199080b842d9885f948623a7bfab9c2245c544ed0eb711189e5e2021a38f19c"},
+ {file = "pyrankvote-2.0.6.tar.gz", hash = "sha256:93b39a0f010d8647bc60a94d2136271cc6979b626a2607ad185368fc505fa142"},
+]
+
+[package.dependencies]
+tabulate = "*"
+
[[package]]
name = "pytest"
version = "8.3.5"
@@ -296,6 +390,62 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+[[package]]
+name = "rich"
+version = "14.0.0"
+description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
+optional = false
+python-versions = ">=3.8.0"
+groups = ["main"]
+files = [
+ {file = "rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0"},
+ {file = "rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725"},
+]
+
+[package.dependencies]
+markdown-it-py = ">=2.2.0"
+pygments = ">=2.13.0,<3.0.0"
+typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+jupyter = ["ipywidgets (>=7.5.1,<9)"]
+
+[[package]]
+name = "rich-click"
+version = "1.8.8"
+description = "Format click help output nicely with rich"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+ {file = "rich_click-1.8.8-py3-none-any.whl", hash = "sha256:205aabd5a98e64ab2c105dee9e368be27480ba004c7dfa2accd0ed44f9f1550e"},
+ {file = "rich_click-1.8.8.tar.gz", hash = "sha256:547c618dea916620af05d4a6456da797fbde904c97901f44d2f32f89d85d6c84"},
+]
+
+[package.dependencies]
+click = ">=7"
+rich = ">=10.7"
+typing_extensions = ">=4"
+
+[package.extras]
+dev = ["mypy", "packaging", "pre-commit", "pytest", "pytest-cov", "rich-codex", "ruff", "types-setuptools"]
+docs = ["markdown_include", "mkdocs", "mkdocs-glightbox", "mkdocs-material-extensions", "mkdocs-material[imaging] (>=9.5.18,<9.6.0)", "mkdocs-rss-plugin", "mkdocstrings[python]", "rich-codex"]
+
+[[package]]
+name = "tabulate"
+version = "0.9.0"
+description = "Pretty-print tabular data"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
+ {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
+]
+
+[package.extras]
+widechars = ["wcwidth"]
+
[[package]]
name = "tomli"
version = "2.2.1"
@@ -303,7 +453,7 @@ description = "A lil' TOML parser"
optional = false
python-versions = ">=3.8"
groups = ["dev"]
-markers = "python_version < \"3.11\""
+markers = "python_version == \"3.10\""
files = [
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -339,6 +489,18 @@ files = [
{file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
]
+[[package]]
+name = "typing-extensions"
+version = "4.13.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+ {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"},
+ {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"},
+]
+
[[package]]
name = "urllib3"
version = "2.3.0"
@@ -357,7 +519,24 @@ h2 = ["h2 (>=4,<5)"]
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["zstandard (>=0.18.0)"]
+[[package]]
+name = "whats-that-code"
+version = "0.2.0"
+description = "Guess programming language from a string or file."
+optional = false
+python-versions = ">=3.6,<4.0"
+groups = ["main"]
+files = [
+ {file = "whats_that_code-0.2.0-py3-none-any.whl", hash = "sha256:923fb3d84ad27c265da7ac2b12251a2055c3325c0bd4dae5e527085b99e84273"},
+ {file = "whats_that_code-0.2.0.tar.gz", hash = "sha256:938fb2443a6a7eb23ceee20f0c246922f206c7356b542113d3161314f8cdc61d"},
+]
+
+[package.dependencies]
+defusedxml = "*"
+pygments = "*"
+pyrankvote = "*"
+
[metadata]
lock-version = "2.1"
python-versions = "^3.10"
-content-hash = "8cbaf417f9a35c922cdab63eef2d00d649ebc5a3933ae7fc354fc921bb8e2387"
+content-hash = "a30d58e9246c747391689d47907d41d7dee51d748b95178e29c60665447e8906"
diff --git a/pyproject.toml b/pyproject.toml
index f47cee0..493b12c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[tool.poetry]
name = "searchcode"
-version = "0.2.3"
-description = "Python SDK for Searchcode."
+version = "0.3.0"
+description = "Python SDK and CLI utility for Searchcode."
authors = ["Ritchie Mwewa "]
license = "GPLv3+"
readme = "README.md"
@@ -19,6 +19,8 @@ classifiers = [
[tool.poetry.dependencies]
python = "^3.10"
requests = "^2.32.2"
+rich-click = "^1.8.8"
+whats-that-code = "^0.2.0"
[tool.poetry.group.dev.dependencies]
flake8 = "^7.1.2"
@@ -27,3 +29,6 @@ pytest = "^8.3.5"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
+
+[tool.poetry.scripts]
+searchcode = "searchcode.cli:cli"
\ No newline at end of file
diff --git a/searchcode/__init__.py b/searchcode/__init__.py
index dbfe2dd..2b4d886 100644
--- a/searchcode/__init__.py
+++ b/searchcode/__init__.py
@@ -1,3 +1,3 @@
-from ._main import *
+from .api import Searchcode
-__all__ = ["code_result", "code_search"]
+__all__ = ["Searchcode"]
diff --git a/searchcode/_main.py b/searchcode/_main.py
deleted file mode 100644
index 3ae95aa..0000000
--- a/searchcode/_main.py
+++ /dev/null
@@ -1,163 +0,0 @@
-from platform import python_version, platform
-from types import SimpleNamespace
-from typing import List, Union, Dict, Optional, Tuple
-
-import requests
-
-from ._filters import (
- CODE_SOURCES,
- CODE_LANGUAGES,
- get_language_ids,
- get_source_ids,
-)
-
-__all__ = ["code_result", "code_search"]
-
-
-_BASE_API_ENDPOINT = "https://searchcode.com/api"
-
-
-def _get_response(
- endpoint: str, params: Optional[List[Tuple[str, str]]] = None, **kwargs
-) -> Union[Dict, List, str]:
- """
- Sends a GET request to the specified endpoint with the given headers and parameters.
-
- :param endpoint: The API endpoint to send the request to.
- :type endpoint: str
- :param params: Optional list of query parameters as key-value tuples.
- :type params: Optional[List[Tuple[str, str]]]
- :return: The parsed JSON response, which could be a dictionary, list, or string.
- :rtype: Union[Dict, List, str]
- :raises Exception: If the request fails or the server returns an error.
- """
-
- response = requests.get(
- url=endpoint,
- params=params,
- headers={
- "User-Agent": f"searchcode-sdk/0.2.2 "
- f"(Python {python_version} on {platform}; +https://pypi.org/project/searchcode)"
- },
- )
- response.raise_for_status()
- return response.text if kwargs.get("is_callback") else response.json()
-
-
-def _response_to_namespace_obj(
- response: Union[List[Dict], Dict]
-) -> Union[List[SimpleNamespace], SimpleNamespace, List[Dict], Dict]:
- """
- Recursively converts the API response into a SimpleNamespace object(s).
-
- :param response: The object to convert, either a dictionary or a list of dictionaries.
- :type response: Union[List[Dict], Dict]
- :return: A SimpleNamespace object or list of SimpleNamespace objects.
- :rtype: Union[List[SimpleNamespace], SimpleNamespace, None]
- """
-
- if isinstance(response, Dict):
- return SimpleNamespace(
- **{
- key: _response_to_namespace_obj(response=value)
- for key, value in response.items()
- }
- )
- elif isinstance(response, List):
- return [_response_to_namespace_obj(response=item) for item in response]
- else:
- return response
-
-
-def code_search(
- query: str,
- page: int = 0,
- per_page: int = 100,
- languages: Optional[List[CODE_LANGUAGES]] = None,
- sources: Optional[List[CODE_SOURCES]] = None,
- lines_of_code_gt: Optional[int] = None,
- lines_of_code_lt: Optional[int] = None,
- callback: Optional[str] = None,
-) -> Union[SimpleNamespace, str]:
- """
- Searches and returns code snippets matching the query.
-
- The following filters are textual and can be added into query directly:
- ----------------------------------------------------------------------
- - Filter by file extention `ext:EXTENTION` E.g. `"gsub ext:erb"`
- - Filter by language `lang:LANGUAGE` E.g. `"import lang:python"`
- - Filter by repository `repo:REPONAME` E.g. `"float Q_rsqrt repo:quake"`
- - Filter by user/repository `repo:USERNAME/REPONAME` E.g. `"batf repo:boyter/batf"`
-
- :param query: Search term
- :type query: str
- :param page: Result page starting at 0 through to 49 (default is 0).
- :type page: int
- :param per_page: Number of results wanted per page max 100 (default is 100)
- :type per_page: int
- :param languages: Allows filtering to languages supplied by return types.
- Supply multiple to filter to multiple languages.
- :type languages: Optional[List[CODE_LANGUAGES]]
- :param sources: Allows filtering to sources supplied by return types.
- Supply multiple to filter to multiple sources.
- :type sources: Optional[List[CODE_SOURCES]]
- :param lines_of_code_gt: Filter to sources with greater lines of code than supplied int. Valid values 0 to 10000.
- :type lines_of_code_gt: int
- :param lines_of_code_lt: Filter to sources with fewer lines of code than supplied int. Valid values 0 to 10000.
- :type lines_of_code_lt: int
- :param callback: Callback function (JSONP only)
- :type callback: str
- :return: The search results as a SimpleNamespace object.
- :rtype: SimpleNamespace
- """
-
- language_ids = [] if not languages else get_language_ids(language_names=languages)
- source_ids = [] if not sources else get_source_ids(source_names=sources)
-
- response = _get_response(
- endpoint=f"{_BASE_API_ENDPOINT}/{'jsonp_codesearch_I' if callback else 'codesearch_I'}/",
- params=[
- ("q", query),
- ("p", page),
- ("per_page", per_page),
- ("loc", lines_of_code_gt),
- ("loc2", lines_of_code_lt),
- ("callback", callback),
- *[("lan", language_id) for language_id in language_ids],
- *[("src", source_id) for source_id in source_ids],
- ],
- is_callback=callback,
- )
-
- return _response_to_namespace_obj(response=response)
-
-
-def code_result(_id: int) -> SimpleNamespace:
- """
- Returns the raw data from a code file given the code ID which can be found as the `id` in a code search result.
-
- :param _id: The unique identifier of the code result.
- :type _id: int
- :return: The code result details as a SimpleNamespace object.
- :rtype: SimpleNamespace
- """
-
- response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/result/{_id}")
- return response.get("code")
-
-
-# This is deprecated.
-# def related_results(_id: int) -> SimpleNamespace:
-# """
-# Returns an array of results given a searchcode unique code id which are considered to be duplicates.
-#
-# The matching is slightly fuzzy allowing so that small differences between files are ignored.
-
-# :param _id: The unique identifier of the code result.
-# :type _id: int
-# :return: A list of related results as a SimpleNamespace object.
-# :rtype: SimpleNamespace
-# """
-
-# response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/related_results/{_id}")
-# return _response_to_namespace_obj(response=response)
diff --git a/searchcode/api.py b/searchcode/api.py
new file mode 100644
index 0000000..f128dd7
--- /dev/null
+++ b/searchcode/api.py
@@ -0,0 +1,167 @@
+import typing as t
+from platform import python_version, platform
+from types import SimpleNamespace
+
+import requests
+
+from .filters import CODE_LANGUAGES, CODE_SOURCES, get_language_ids, get_source_ids
+
+BASE_API_ENDPOINT = "https://searchcode.com/api"
+
+__all__ = ["Searchcode"]
+
+
+class Searchcode:
+ def __init__(self, user_agent: str):
+ self.user_agent = user_agent
+
+ def search(
+ self,
+ query: str,
+ page: int = 0,
+ per_page: int = 100,
+ languages: t.Optional[t.List[CODE_LANGUAGES]] = None,
+ sources: t.Optional[t.List[CODE_SOURCES]] = None,
+ lines_of_code_gt: t.Optional[int] = None,
+ lines_of_code_lt: t.Optional[int] = None,
+ callback: t.Optional[str] = None,
+ ) -> t.Union[SimpleNamespace, str]:
+ """
+ Searches and returns code snippets matching the query.
+
+ The following filters are textual and can be added into query directly:
+ ----------------------------------------------------------------------
+ - Filter by file extention `ext:EXTENTION` E.g. `"gsub ext:erb"`
+ - Filter by language `lang:LANGUAGE` E.g. `"import lang:python"`
+ - Filter by repository `repo:REPONAME` E.g. `"float Q_rsqrt repo:quake"`
+ - Filter by user/repository `repo:USERNAME/REPONAME` E.g. `"batf repo:boyter/batf"`
+
+ :param query: Search term
+ :type query: str
+ :param page: Result page starting at 0 through to 49 (default is 0).
+ :type page: int
+ :param per_page: Number of results wanted per page max 100 (default is 100)
+ :type per_page: int
+ :param languages: Allows filtering to languages supplied by return types.
+ Supply multiple to filter to multiple languages.
+ :type languages: Optional[List[CODE_LANGUAGES]]
+ :param sources: Allows filtering to sources supplied by return types.
+ Supply multiple to filter to multiple sources.
+ :type sources: Optional[List[CODE_SOURCES]]
+ :param lines_of_code_gt: Filter to sources with greater lines of code than supplied int. Valid values 0 to 10000.
+ :type lines_of_code_gt: int
+ :param lines_of_code_lt: Filter to sources with fewer lines of code than supplied int. Valid values 0 to 10000.
+ :type lines_of_code_lt: int
+ :param callback: Callback function (JSONP only)
+ :type callback: str
+ :return: The search results as a SimpleNamespace object.
+ :rtype: SimpleNamespace
+ """
+
+ results: t.List = []
+ language_ids = (
+ [] if not languages else get_language_ids(language_names=languages)
+ )
+ source_ids = [] if not sources else get_source_ids(source_names=sources)
+
+ response = self.__send_request(
+ endpoint=f"{BASE_API_ENDPOINT}/{'jsonp_codesearch_I' if callback else 'codesearch_I'}/",
+ params=[
+ ("q", query),
+ ("p", page),
+ ("per_page", per_page),
+ ("loc", lines_of_code_gt),
+ ("loc2", lines_of_code_lt),
+ ("callback", callback),
+ *[("lan", language_id) for language_id in language_ids],
+ *[("src", source_id) for source_id in source_ids],
+ ],
+ is_callback=callback,
+ )
+
+ response["results"] = response.get("results")[:per_page]
+ return self.__response_to_namespace_obj(response=response)
+
+ def code(self, __id: int) -> str:
+ """
+ Returns the raw data from a code file given the code ID which can be found as the `id` in a code search result.
+
+ :param __id: The unique identifier of the code result.
+ :type __id: int
+ :return: Raw code result data.
+ :rtype: str
+ """
+
+ response = self.__send_request(endpoint=f"{BASE_API_ENDPOINT}/result/{__id}")
+ return response.get("code")
+
+ # This is deprecated (for now).
+ # def related(_id: int) -> SimpleNamespace:
+ # """
+ # Returns an array of results given a searchcode unique code id which are considered to be duplicates.
+ #
+ # The matching is slightly fuzzy allowing so that small differences between files are ignored.
+
+ # :param _id: The unique identifier of the code result.
+ # :type _id: int
+ # :return: A list of related results as a SimpleNamespace object.
+ # :rtype: SimpleNamespace
+ # """
+
+ # response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/related_results/{_id}")
+ # return _response_to_namespace_obj(response=response)
+
+ def __send_request(
+ self,
+ endpoint: str,
+ params: t.Optional[t.List[t.Tuple[str, str]]] = None,
+ is_callback: str = None,
+ ) -> t.Union[t.Dict, t.List, str]:
+ """
+ (Private function) Sends a GET request to the specified endpoint with the given headers and parameters.
+
+ :param endpoint: The API endpoint to send the request to.
+ :type endpoint: str
+ :param params: Optional list of query parameters as key-value tuples.
+ :type params: Optional[List[Tuple[str, str]]]
+ :return: The parsed JSON response, which could be a dictionary, list, or string.
+ :rtype: Union[Dict, List, str]
+ :raises Exception: If the request fails or the server returns an error.
+ """
+
+ response = requests.get(
+ url=endpoint,
+ params=params,
+ headers={
+ "User-Agent": f"{self.user_agent.replace(' ', '-')} "
+ f"(Python {python_version} on {platform}; +https://pypi.org/project/searchcode)"
+ },
+ )
+ response.raise_for_status()
+ return response.text if is_callback else response.json()
+
+ def __response_to_namespace_obj(
+ self, response: t.Union[t.List[t.Dict], t.Dict]
+ ) -> t.Union[t.List[SimpleNamespace], SimpleNamespace, t.List[t.Dict], t.Dict]:
+ """
+ (Private function) Recursively converts the API response into a SimpleNamespace object(s).
+
+ :param response: The object to convert, either a dictionary or a list of dictionaries.
+ :type response: Union[List[Dict], Dict]
+ :return: A SimpleNamespace object or list of SimpleNamespace objects.
+ :rtype: Union[List[SimpleNamespace], SimpleNamespace, None]
+ """
+
+ if isinstance(response, t.Dict):
+ return SimpleNamespace(
+ **{
+ key: self.__response_to_namespace_obj(response=value)
+ for key, value in response.items()
+ }
+ )
+ elif isinstance(response, t.List):
+ return [
+ self.__response_to_namespace_obj(response=item) for item in response
+ ]
+ else:
+ return response
diff --git a/searchcode/cli.py b/searchcode/cli.py
new file mode 100644
index 0000000..d581474
--- /dev/null
+++ b/searchcode/cli.py
@@ -0,0 +1,140 @@
+from types import SimpleNamespace
+from typing import Optional, List
+
+import rich_click as click
+from rich import print as rprint, box
+from rich.syntax import Syntax
+from rich.table import Table
+from whats_that_code.election import guess_language_all_methods
+
+from .api import Searchcode
+
+sc = Searchcode(user_agent="searchCode-sdk/cli")
+
+__all__ = ["cli"]
+
+
+@click.group()
+def cli():
+ """
+ Searchcode: Simple comprehensive code search.
+ """
+ ...
+
+
+@cli.command()
+@click.argument("query", type=str)
+@click.option("--pretty", type=bool, help="Return results in raw JSON format.")
+@click.option(
+ "--page",
+ type=int,
+ default=0,
+ help="Start page number (defaults to 0).",
+)
+@click.option(
+ "--per-page",
+ type=int,
+ default=100,
+ help="Results per page (defaults to 100).",
+)
+@click.option(
+ "--lines-of-code-lt",
+ type=int,
+ help="Filter to sources with less lines of code than the supplied value (Valid values: 0 to 10000).",
+)
+@click.option(
+ "--lines-of-code-gt",
+ type=int,
+ help="Filter to sources with greater lines of code than the supplied value (Valid values: 0 to 10000).",
+)
+@click.option(
+ "--sources",
+ type=str,
+ help="A comma-separated list of code sources to filter results.",
+)
+@click.option(
+ "--languages",
+ type=str,
+ help="A comma-separated list of code languages to filter results.",
+)
+def search(
+ query: str,
+ page: int = 0,
+ per_page: int = 100,
+ pretty: bool = False,
+ lines_of_code_lt: Optional[int] = None,
+ lines_of_code_gt: Optional[int] = None,
+ languages: Optional[str] = None,
+ sources: Optional[str] = None,
+):
+ """
+ Query the code index and (returns 100 results by default).
+
+ e.g., searchcode search "gsub ext:erb" --pretty
+ """
+ languages = languages.split(",") if languages else None
+ sources = sources.split(",") if sources else None
+
+ results = sc.search(
+ query=query,
+ page=page,
+ per_page=per_page,
+ languages=languages,
+ sources=sources,
+ lines_of_code_lt=lines_of_code_lt,
+ lines_of_code_gt=lines_of_code_gt,
+ )
+ if pretty:
+ rprint(results)
+ else:
+ print_table(records=results.results, ignore_keys=["lines"])
+
+
+@cli.command()
+@click.argument("id", type=int)
+def code(id: int):
+ """
+ Get the raw data from a code file.
+
+ e.g., searchcode code 4061576
+ """
+ code_data = sc.code(id)
+ if code_data:
+ language = guess_language_all_methods(code=code_data)
+ syntax = Syntax(code=code_data, lexer=language, line_numbers=True)
+ rprint(syntax)
+
+
+def print_table(records: List[SimpleNamespace], ignore_keys: List[str] = None) -> None:
+ """
+ Creates a rich table from a list of SimpleNamespace objects,
+ ignoring specified keys.
+
+ :param records: List of SimpleNamespace instances.
+ :param ignore_keys: List of keys to exclude from the table.
+ :return: None. Prints the table using rich.
+ """
+ if not records:
+ raise ValueError("Data must be a non-empty list of SimpleNamespace objects.")
+
+ ignore_keys = ignore_keys or []
+
+ # Collect all unique keys across all records, excluding ignored ones
+ all_keys = set()
+ for record in records:
+ all_keys.update(key for key in record.__dict__.keys() if key not in ignore_keys)
+
+ columns = sorted(all_keys)
+
+ table = Table(box=box.ROUNDED, highlight=True, header_style="bold")
+
+ for index, column in enumerate(columns):
+ style = "dim" if index == 0 else None
+ table.add_column(column.capitalize(), style=style)
+
+ for record in records:
+ data = record.__dict__
+ row = [str(data.get(column, "")) for column in columns]
+ table.add_row(*row)
+
+ rprint(table)
diff --git a/searchcode/_filters.py b/searchcode/filters.py
similarity index 100%
rename from searchcode/_filters.py
rename to searchcode/filters.py
diff --git a/tests/test_searchcode.py b/tests/test_searchcode.py
index 5e53cc2..893ddf3 100644
--- a/tests/test_searchcode.py
+++ b/tests/test_searchcode.py
@@ -1,21 +1,22 @@
-import searchcode as sc
+from searchcode import Searchcode
+
+sc = Searchcode(user_agent="Pytest")
def test_filter_by_extension():
- search = sc.code_search(query="gsub ext:erb")
+ search = sc.search("gsub ext:erb")
for result in search.results:
assert result.filename.endswith(".erb")
def test_code_result():
- code = sc.code_result(4061576)
+ code = sc.code(4061576)
assert isinstance(code, str)
assert "This file is part of Quake III Arena source code" in code
-"""
-def test_related_results():
- related = sc.related_results(4061576)
- assert isinstance(related, list)
- assert len(related) == 0
-"""
+# deprecated (for now)
+# def test_related_results():
+# related = sc.related_results(4061576)
+# assert isinstance(related, list)
+# assert len(related) == 0
From d9acaaccef914e5b16b831fa66320708b0596b19 Mon Sep 17 00:00:00 2001
From: Ritchie Mwewa <74001397+rly0nheart@users.noreply.github.com>
Date: Thu, 8 May 2025 03:42:07 +0200
Subject: [PATCH 2/2] Added a CLI with code highlighting, and table display for
the results... also, code got refactored
---
pyproject.toml | 2 +-
searchcode/cli.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 493b12c..c33d8f4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[tool.poetry]
name = "searchcode"
version = "0.3.0"
-description = "Python SDK and CLI utility for Searchcode."
+description = "Simple, comprehensive code search."
authors = ["Ritchie Mwewa "]
license = "GPLv3+"
readme = "README.md"
diff --git a/searchcode/cli.py b/searchcode/cli.py
index d581474..d1cac55 100644
--- a/searchcode/cli.py
+++ b/searchcode/cli.py
@@ -17,7 +17,7 @@
@click.group()
def cli():
"""
- Searchcode: Simple comprehensive code search.
+ Searchcode: Simple, comprehensive code search.
"""
...