From 4d0fccb72aaadc0aad7f3719e0442b4f89fe9a99 Mon Sep 17 00:00:00 2001 From: Ritchie Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Thu, 8 May 2025 03:32:27 +0200 Subject: [PATCH 1/2] Added a CLI with code highlighting, and table display for the results... also, code got refactored --- README.md | 129 +++++++++++++---- poetry.lock | 191 ++++++++++++++++++++++++- pyproject.toml | 9 +- searchcode/__init__.py | 4 +- searchcode/_main.py | 163 --------------------- searchcode/api.py | 167 +++++++++++++++++++++ searchcode/cli.py | 140 ++++++++++++++++++ searchcode/{_filters.py => filters.py} | 0 tests/test_searchcode.py | 19 +-- 9 files changed, 615 insertions(+), 207 deletions(-) delete mode 100644 searchcode/_main.py create mode 100644 searchcode/api.py create mode 100644 searchcode/cli.py rename searchcode/{_filters.py => filters.py} (100%) diff --git a/README.md b/README.md index 324cbaf..c95a84e 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,32 @@ -


Python SDK for Searchcode.
Search 75 billion lines of code from 40 million projects

+


Python SDK and Command-Line Utility for Searchcode.
Search 75 billion lines of code from 40 million projects

+--- + +```commandline +searchcode --help +``` + +```python +from searchcode import Searchcode + +sc = Searchcode(user_agent="My-Searchcode-script") +search = sc.search(query="test") + +for result in search.results: + print(result) +``` + +--- + ## Installation ```bash pip install searchcode - ``` -## Documentation +## Getting Started ### Code Search @@ -35,65 +52,116 @@ Queries the code index and returns at most 100 results. > To fetch all results for a given query, keep incrementing `page` parameter until you get a page with an empty results > list. +--- -#### Code Search Without Filters +### Code Search Without Filters -```python +#### SDK -import searchcode as sc +```python +from searchcode import Searchcode -search = sc.code_search(query="test") +sc = Searchcode(user_agent="My-Searchcode-script") +search = sc.search(query="test") for result in search.results: print(result) ``` -#### Filter by Language (Java and JavaScript) +#### CLI -```python +```commandline +searchcode test +``` + +--- -import searchcode as sc +### Filter by Language (Java and JavaScript) -search = sc.code_search(query="test", languages=["Java", "JavaScript"]) +#### SDK + +```python +from searchcode import Searchcode + +sc = Searchcode(user_agent="My-Searchcode-script") +search = sc.search(query="test", languages=["Java", "JavaScript"]) for result in search.results: print(result.language) ``` -#### Filter by Source (BitBucket and CodePlex) +#### CLI -```python +````commandline +searchcode test --languages java,javascript +```` + +___ -import searchcode as sc +### Filter by Source (BitBucket and CodePlex) -search = sc.code_search(query="test", sources=["BitBucket", "CodePlex"]) +#### SDK + +```python +from searchcode import Searchcode + +sc = Searchcode(user_agent="My-Searchcode-script") +search = sc.search(query="test", sources=["BitBucket", "CodePlex"]) for result in search.results: print(result.filename) ``` -#### Filter by Lines of Code (Between 500 and 1000) +#### CLI + +```commandline +searchcode test --sources bitbucket,codeplex +``` + +___ + +### Filter by Lines of Code (Between 500 and 1000) + +#### SDK ```python -import searchcode as sc +from searchcode import Searchcode -search = sc.code_search(query="test", lines_of_code_gt=500, lines_of_code_lt=1000) +sc = Searchcode(user_agent="My-Searchcode-script") +search = sc.search(query="test", lines_of_code_gt=500, lines_of_code_lt=1000) for result in search.results: print(result) ``` -#### With Callback Function (JSONP only) +#### CLI + +```commandline +searchcode test --lines-of-code-gt 500 --lines-of-code-lt 1000 +``` + +___ + +### With Callback Function (JSONP only) + +#### SDK ```python -import searchcode as sc +from searchcode import Searchcode -search = sc.code_search(query="test", callback="myCallback") +sc = Searchcode(user_agent="My-Searchcode-script") +search = sc.search(query="test", callback="myCallback") print(search) ``` -#### Response Attribute Definitions +#### CLI + +```commandline +searchcode test --callback myCallback +``` + +### Response Attribute Definitions | Attribute | Description | |----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -124,22 +192,33 @@ print(search) | **md5hash** | Calculated MD5 hash of the file's contents. | | **lines** | Contains line numbers and lines which match the `searchterm`. Lines immediately before and after the match are included. If only the filename matches, up to the first 15 lines of the file are returned. | +___ + ### Code Result Returns the raw data from a code file given the code id which can be found as the `id` in a code search result. +#### SDK + #### Params - `_id`: Unique identifier for the code file (required). ```python -import searchcode as sc +from searchcode import Searchcode -code = sc.code_result(4061576) +sc = Searchcode(user_agent="My-Searchcode-script") +code = sc.code(4061576) print(code) ``` +#### CLI + +```commandline +searchode code 4061576 +``` + ## About Searchcode Searchcode is a simple, comprehensive source code search engine that indexes billions of lines of code from open-source @@ -148,7 +227,7 @@ helping you find real world examples of functions, API's and libraries in 243 la [Learn more](https://searchcode.com/about) -## Acknowledgements +## Credits This SDK is developed and maintained by [Ritchie Mwewa](https://gravatar.com/rly0nheart), in collaboration with [Ben Boyter](https://boyter.org/about/), the creator of [Searchcode.com](https://searchcode.com). diff --git a/poetry.lock b/poetry.lock index 7dcb32b..296ee98 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "certifi" @@ -114,18 +114,45 @@ files = [ {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, ] +[[package]] +name = "click" +version = "8.1.8" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["dev"] -markers = "sys_platform == \"win32\"" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} + +[[package]] +name = "defusedxml" +version = "0.7.1" +description = "XML bomb protection for Python stdlib modules" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main"] +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] [[package]] name = "exceptiongroup" @@ -134,7 +161,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -187,6 +214,31 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "mccabe" version = "0.7.0" @@ -199,6 +251,18 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "packaging" version = "24.2" @@ -251,6 +315,36 @@ files = [ {file = "pyflakes-3.3.2.tar.gz", hash = "sha256:6dfd61d87b97fba5dcfaaf781171ac16be16453be6d816147989e7f6e6a9576b"}, ] +[[package]] +name = "pygments" +version = "2.19.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"}, + {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pyrankvote" +version = "2.0.6" +description = "PyRankVote is a python library for different ranked voting methods, like instant-runoff voting, single transferable vote and preferential block voting, created by Jon Tingvold." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pyrankvote-2.0.6-py3-none-any.whl", hash = "sha256:9199080b842d9885f948623a7bfab9c2245c544ed0eb711189e5e2021a38f19c"}, + {file = "pyrankvote-2.0.6.tar.gz", hash = "sha256:93b39a0f010d8647bc60a94d2136271cc6979b626a2607ad185368fc505fa142"}, +] + +[package.dependencies] +tabulate = "*" + [[package]] name = "pytest" version = "8.3.5" @@ -296,6 +390,62 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "rich" +version = "14.0.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.8.0" +groups = ["main"] +files = [ + {file = "rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0"}, + {file = "rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.11\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "rich-click" +version = "1.8.8" +description = "Format click help output nicely with rich" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "rich_click-1.8.8-py3-none-any.whl", hash = "sha256:205aabd5a98e64ab2c105dee9e368be27480ba004c7dfa2accd0ed44f9f1550e"}, + {file = "rich_click-1.8.8.tar.gz", hash = "sha256:547c618dea916620af05d4a6456da797fbde904c97901f44d2f32f89d85d6c84"}, +] + +[package.dependencies] +click = ">=7" +rich = ">=10.7" +typing_extensions = ">=4" + +[package.extras] +dev = ["mypy", "packaging", "pre-commit", "pytest", "pytest-cov", "rich-codex", "ruff", "types-setuptools"] +docs = ["markdown_include", "mkdocs", "mkdocs-glightbox", "mkdocs-material-extensions", "mkdocs-material[imaging] (>=9.5.18,<9.6.0)", "mkdocs-rss-plugin", "mkdocstrings[python]", "rich-codex"] + +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + [[package]] name = "tomli" version = "2.2.1" @@ -303,7 +453,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -339,6 +489,18 @@ files = [ {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] +[[package]] +name = "typing-extensions" +version = "4.13.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, + {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, +] + [[package]] name = "urllib3" version = "2.3.0" @@ -357,7 +519,24 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "whats-that-code" +version = "0.2.0" +description = "Guess programming language from a string or file." +optional = false +python-versions = ">=3.6,<4.0" +groups = ["main"] +files = [ + {file = "whats_that_code-0.2.0-py3-none-any.whl", hash = "sha256:923fb3d84ad27c265da7ac2b12251a2055c3325c0bd4dae5e527085b99e84273"}, + {file = "whats_that_code-0.2.0.tar.gz", hash = "sha256:938fb2443a6a7eb23ceee20f0c246922f206c7356b542113d3161314f8cdc61d"}, +] + +[package.dependencies] +defusedxml = "*" +pygments = "*" +pyrankvote = "*" + [metadata] lock-version = "2.1" python-versions = "^3.10" -content-hash = "8cbaf417f9a35c922cdab63eef2d00d649ebc5a3933ae7fc354fc921bb8e2387" +content-hash = "a30d58e9246c747391689d47907d41d7dee51d748b95178e29c60665447e8906" diff --git a/pyproject.toml b/pyproject.toml index f47cee0..493b12c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "searchcode" -version = "0.2.3" -description = "Python SDK for Searchcode." +version = "0.3.0" +description = "Python SDK and CLI utility for Searchcode." authors = ["Ritchie Mwewa "] license = "GPLv3+" readme = "README.md" @@ -19,6 +19,8 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.10" requests = "^2.32.2" +rich-click = "^1.8.8" +whats-that-code = "^0.2.0" [tool.poetry.group.dev.dependencies] flake8 = "^7.1.2" @@ -27,3 +29,6 @@ pytest = "^8.3.5" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.poetry.scripts] +searchcode = "searchcode.cli:cli" \ No newline at end of file diff --git a/searchcode/__init__.py b/searchcode/__init__.py index dbfe2dd..2b4d886 100644 --- a/searchcode/__init__.py +++ b/searchcode/__init__.py @@ -1,3 +1,3 @@ -from ._main import * +from .api import Searchcode -__all__ = ["code_result", "code_search"] +__all__ = ["Searchcode"] diff --git a/searchcode/_main.py b/searchcode/_main.py deleted file mode 100644 index 3ae95aa..0000000 --- a/searchcode/_main.py +++ /dev/null @@ -1,163 +0,0 @@ -from platform import python_version, platform -from types import SimpleNamespace -from typing import List, Union, Dict, Optional, Tuple - -import requests - -from ._filters import ( - CODE_SOURCES, - CODE_LANGUAGES, - get_language_ids, - get_source_ids, -) - -__all__ = ["code_result", "code_search"] - - -_BASE_API_ENDPOINT = "https://searchcode.com/api" - - -def _get_response( - endpoint: str, params: Optional[List[Tuple[str, str]]] = None, **kwargs -) -> Union[Dict, List, str]: - """ - Sends a GET request to the specified endpoint with the given headers and parameters. - - :param endpoint: The API endpoint to send the request to. - :type endpoint: str - :param params: Optional list of query parameters as key-value tuples. - :type params: Optional[List[Tuple[str, str]]] - :return: The parsed JSON response, which could be a dictionary, list, or string. - :rtype: Union[Dict, List, str] - :raises Exception: If the request fails or the server returns an error. - """ - - response = requests.get( - url=endpoint, - params=params, - headers={ - "User-Agent": f"searchcode-sdk/0.2.2 " - f"(Python {python_version} on {platform}; +https://pypi.org/project/searchcode)" - }, - ) - response.raise_for_status() - return response.text if kwargs.get("is_callback") else response.json() - - -def _response_to_namespace_obj( - response: Union[List[Dict], Dict] -) -> Union[List[SimpleNamespace], SimpleNamespace, List[Dict], Dict]: - """ - Recursively converts the API response into a SimpleNamespace object(s). - - :param response: The object to convert, either a dictionary or a list of dictionaries. - :type response: Union[List[Dict], Dict] - :return: A SimpleNamespace object or list of SimpleNamespace objects. - :rtype: Union[List[SimpleNamespace], SimpleNamespace, None] - """ - - if isinstance(response, Dict): - return SimpleNamespace( - **{ - key: _response_to_namespace_obj(response=value) - for key, value in response.items() - } - ) - elif isinstance(response, List): - return [_response_to_namespace_obj(response=item) for item in response] - else: - return response - - -def code_search( - query: str, - page: int = 0, - per_page: int = 100, - languages: Optional[List[CODE_LANGUAGES]] = None, - sources: Optional[List[CODE_SOURCES]] = None, - lines_of_code_gt: Optional[int] = None, - lines_of_code_lt: Optional[int] = None, - callback: Optional[str] = None, -) -> Union[SimpleNamespace, str]: - """ - Searches and returns code snippets matching the query. - - The following filters are textual and can be added into query directly: - ---------------------------------------------------------------------- - - Filter by file extention `ext:EXTENTION` E.g. `"gsub ext:erb"` - - Filter by language `lang:LANGUAGE` E.g. `"import lang:python"` - - Filter by repository `repo:REPONAME` E.g. `"float Q_rsqrt repo:quake"` - - Filter by user/repository `repo:USERNAME/REPONAME` E.g. `"batf repo:boyter/batf"` - - :param query: Search term - :type query: str - :param page: Result page starting at 0 through to 49 (default is 0). - :type page: int - :param per_page: Number of results wanted per page max 100 (default is 100) - :type per_page: int - :param languages: Allows filtering to languages supplied by return types. - Supply multiple to filter to multiple languages. - :type languages: Optional[List[CODE_LANGUAGES]] - :param sources: Allows filtering to sources supplied by return types. - Supply multiple to filter to multiple sources. - :type sources: Optional[List[CODE_SOURCES]] - :param lines_of_code_gt: Filter to sources with greater lines of code than supplied int. Valid values 0 to 10000. - :type lines_of_code_gt: int - :param lines_of_code_lt: Filter to sources with fewer lines of code than supplied int. Valid values 0 to 10000. - :type lines_of_code_lt: int - :param callback: Callback function (JSONP only) - :type callback: str - :return: The search results as a SimpleNamespace object. - :rtype: SimpleNamespace - """ - - language_ids = [] if not languages else get_language_ids(language_names=languages) - source_ids = [] if not sources else get_source_ids(source_names=sources) - - response = _get_response( - endpoint=f"{_BASE_API_ENDPOINT}/{'jsonp_codesearch_I' if callback else 'codesearch_I'}/", - params=[ - ("q", query), - ("p", page), - ("per_page", per_page), - ("loc", lines_of_code_gt), - ("loc2", lines_of_code_lt), - ("callback", callback), - *[("lan", language_id) for language_id in language_ids], - *[("src", source_id) for source_id in source_ids], - ], - is_callback=callback, - ) - - return _response_to_namespace_obj(response=response) - - -def code_result(_id: int) -> SimpleNamespace: - """ - Returns the raw data from a code file given the code ID which can be found as the `id` in a code search result. - - :param _id: The unique identifier of the code result. - :type _id: int - :return: The code result details as a SimpleNamespace object. - :rtype: SimpleNamespace - """ - - response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/result/{_id}") - return response.get("code") - - -# This is deprecated. -# def related_results(_id: int) -> SimpleNamespace: -# """ -# Returns an array of results given a searchcode unique code id which are considered to be duplicates. -# -# The matching is slightly fuzzy allowing so that small differences between files are ignored. - -# :param _id: The unique identifier of the code result. -# :type _id: int -# :return: A list of related results as a SimpleNamespace object. -# :rtype: SimpleNamespace -# """ - -# response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/related_results/{_id}") -# return _response_to_namespace_obj(response=response) diff --git a/searchcode/api.py b/searchcode/api.py new file mode 100644 index 0000000..f128dd7 --- /dev/null +++ b/searchcode/api.py @@ -0,0 +1,167 @@ +import typing as t +from platform import python_version, platform +from types import SimpleNamespace + +import requests + +from .filters import CODE_LANGUAGES, CODE_SOURCES, get_language_ids, get_source_ids + +BASE_API_ENDPOINT = "https://searchcode.com/api" + +__all__ = ["Searchcode"] + + +class Searchcode: + def __init__(self, user_agent: str): + self.user_agent = user_agent + + def search( + self, + query: str, + page: int = 0, + per_page: int = 100, + languages: t.Optional[t.List[CODE_LANGUAGES]] = None, + sources: t.Optional[t.List[CODE_SOURCES]] = None, + lines_of_code_gt: t.Optional[int] = None, + lines_of_code_lt: t.Optional[int] = None, + callback: t.Optional[str] = None, + ) -> t.Union[SimpleNamespace, str]: + """ + Searches and returns code snippets matching the query. + + The following filters are textual and can be added into query directly: + ---------------------------------------------------------------------- + - Filter by file extention `ext:EXTENTION` E.g. `"gsub ext:erb"` + - Filter by language `lang:LANGUAGE` E.g. `"import lang:python"` + - Filter by repository `repo:REPONAME` E.g. `"float Q_rsqrt repo:quake"` + - Filter by user/repository `repo:USERNAME/REPONAME` E.g. `"batf repo:boyter/batf"` + + :param query: Search term + :type query: str + :param page: Result page starting at 0 through to 49 (default is 0). + :type page: int + :param per_page: Number of results wanted per page max 100 (default is 100) + :type per_page: int + :param languages: Allows filtering to languages supplied by return types. + Supply multiple to filter to multiple languages. + :type languages: Optional[List[CODE_LANGUAGES]] + :param sources: Allows filtering to sources supplied by return types. + Supply multiple to filter to multiple sources. + :type sources: Optional[List[CODE_SOURCES]] + :param lines_of_code_gt: Filter to sources with greater lines of code than supplied int. Valid values 0 to 10000. + :type lines_of_code_gt: int + :param lines_of_code_lt: Filter to sources with fewer lines of code than supplied int. Valid values 0 to 10000. + :type lines_of_code_lt: int + :param callback: Callback function (JSONP only) + :type callback: str + :return: The search results as a SimpleNamespace object. + :rtype: SimpleNamespace + """ + + results: t.List = [] + language_ids = ( + [] if not languages else get_language_ids(language_names=languages) + ) + source_ids = [] if not sources else get_source_ids(source_names=sources) + + response = self.__send_request( + endpoint=f"{BASE_API_ENDPOINT}/{'jsonp_codesearch_I' if callback else 'codesearch_I'}/", + params=[ + ("q", query), + ("p", page), + ("per_page", per_page), + ("loc", lines_of_code_gt), + ("loc2", lines_of_code_lt), + ("callback", callback), + *[("lan", language_id) for language_id in language_ids], + *[("src", source_id) for source_id in source_ids], + ], + is_callback=callback, + ) + + response["results"] = response.get("results")[:per_page] + return self.__response_to_namespace_obj(response=response) + + def code(self, __id: int) -> str: + """ + Returns the raw data from a code file given the code ID which can be found as the `id` in a code search result. + + :param __id: The unique identifier of the code result. + :type __id: int + :return: Raw code result data. + :rtype: str + """ + + response = self.__send_request(endpoint=f"{BASE_API_ENDPOINT}/result/{__id}") + return response.get("code") + + # This is deprecated (for now). + # def related(_id: int) -> SimpleNamespace: + # """ + # Returns an array of results given a searchcode unique code id which are considered to be duplicates. + # + # The matching is slightly fuzzy allowing so that small differences between files are ignored. + + # :param _id: The unique identifier of the code result. + # :type _id: int + # :return: A list of related results as a SimpleNamespace object. + # :rtype: SimpleNamespace + # """ + + # response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/related_results/{_id}") + # return _response_to_namespace_obj(response=response) + + def __send_request( + self, + endpoint: str, + params: t.Optional[t.List[t.Tuple[str, str]]] = None, + is_callback: str = None, + ) -> t.Union[t.Dict, t.List, str]: + """ + (Private function) Sends a GET request to the specified endpoint with the given headers and parameters. + + :param endpoint: The API endpoint to send the request to. + :type endpoint: str + :param params: Optional list of query parameters as key-value tuples. + :type params: Optional[List[Tuple[str, str]]] + :return: The parsed JSON response, which could be a dictionary, list, or string. + :rtype: Union[Dict, List, str] + :raises Exception: If the request fails or the server returns an error. + """ + + response = requests.get( + url=endpoint, + params=params, + headers={ + "User-Agent": f"{self.user_agent.replace(' ', '-')} " + f"(Python {python_version} on {platform}; +https://pypi.org/project/searchcode)" + }, + ) + response.raise_for_status() + return response.text if is_callback else response.json() + + def __response_to_namespace_obj( + self, response: t.Union[t.List[t.Dict], t.Dict] + ) -> t.Union[t.List[SimpleNamespace], SimpleNamespace, t.List[t.Dict], t.Dict]: + """ + (Private function) Recursively converts the API response into a SimpleNamespace object(s). + + :param response: The object to convert, either a dictionary or a list of dictionaries. + :type response: Union[List[Dict], Dict] + :return: A SimpleNamespace object or list of SimpleNamespace objects. + :rtype: Union[List[SimpleNamespace], SimpleNamespace, None] + """ + + if isinstance(response, t.Dict): + return SimpleNamespace( + **{ + key: self.__response_to_namespace_obj(response=value) + for key, value in response.items() + } + ) + elif isinstance(response, t.List): + return [ + self.__response_to_namespace_obj(response=item) for item in response + ] + else: + return response diff --git a/searchcode/cli.py b/searchcode/cli.py new file mode 100644 index 0000000..d581474 --- /dev/null +++ b/searchcode/cli.py @@ -0,0 +1,140 @@ +from types import SimpleNamespace +from typing import Optional, List + +import rich_click as click +from rich import print as rprint, box +from rich.syntax import Syntax +from rich.table import Table +from whats_that_code.election import guess_language_all_methods + +from .api import Searchcode + +sc = Searchcode(user_agent="searchCode-sdk/cli") + +__all__ = ["cli"] + + +@click.group() +def cli(): + """ + Searchcode: Simple comprehensive code search. + """ + ... + + +@cli.command() +@click.argument("query", type=str) +@click.option("--pretty", type=bool, help="Return results in raw JSON format.") +@click.option( + "--page", + type=int, + default=0, + help="Start page number (defaults to 0).", +) +@click.option( + "--per-page", + type=int, + default=100, + help="Results per page (defaults to 100).", +) +@click.option( + "--lines-of-code-lt", + type=int, + help="Filter to sources with less lines of code than the supplied value (Valid values: 0 to 10000).", +) +@click.option( + "--lines-of-code-gt", + type=int, + help="Filter to sources with greater lines of code than the supplied value (Valid values: 0 to 10000).", +) +@click.option( + "--sources", + type=str, + help="A comma-separated list of code sources to filter results.", +) +@click.option( + "--languages", + type=str, + help="A comma-separated list of code languages to filter results.", +) +def search( + query: str, + page: int = 0, + per_page: int = 100, + pretty: bool = False, + lines_of_code_lt: Optional[int] = None, + lines_of_code_gt: Optional[int] = None, + languages: Optional[str] = None, + sources: Optional[str] = None, +): + """ + Query the code index and (returns 100 results by default). + + e.g., searchcode search "gsub ext:erb" --pretty + """ + languages = languages.split(",") if languages else None + sources = sources.split(",") if sources else None + + results = sc.search( + query=query, + page=page, + per_page=per_page, + languages=languages, + sources=sources, + lines_of_code_lt=lines_of_code_lt, + lines_of_code_gt=lines_of_code_gt, + ) + if pretty: + rprint(results) + else: + print_table(records=results.results, ignore_keys=["lines"]) + + +@cli.command() +@click.argument("id", type=int) +def code(id: int): + """ + Get the raw data from a code file. + + e.g., searchcode code 4061576 + """ + code_data = sc.code(id) + if code_data: + language = guess_language_all_methods(code=code_data) + syntax = Syntax(code=code_data, lexer=language, line_numbers=True) + rprint(syntax) + + +def print_table(records: List[SimpleNamespace], ignore_keys: List[str] = None) -> None: + """ + Creates a rich table from a list of SimpleNamespace objects, + ignoring specified keys. + + :param records: List of SimpleNamespace instances. + :param ignore_keys: List of keys to exclude from the table. + :return: None. Prints the table using rich. + """ + if not records: + raise ValueError("Data must be a non-empty list of SimpleNamespace objects.") + + ignore_keys = ignore_keys or [] + + # Collect all unique keys across all records, excluding ignored ones + all_keys = set() + for record in records: + all_keys.update(key for key in record.__dict__.keys() if key not in ignore_keys) + + columns = sorted(all_keys) + + table = Table(box=box.ROUNDED, highlight=True, header_style="bold") + + for index, column in enumerate(columns): + style = "dim" if index == 0 else None + table.add_column(column.capitalize(), style=style) + + for record in records: + data = record.__dict__ + row = [str(data.get(column, "")) for column in columns] + table.add_row(*row) + + rprint(table) diff --git a/searchcode/_filters.py b/searchcode/filters.py similarity index 100% rename from searchcode/_filters.py rename to searchcode/filters.py diff --git a/tests/test_searchcode.py b/tests/test_searchcode.py index 5e53cc2..893ddf3 100644 --- a/tests/test_searchcode.py +++ b/tests/test_searchcode.py @@ -1,21 +1,22 @@ -import searchcode as sc +from searchcode import Searchcode + +sc = Searchcode(user_agent="Pytest") def test_filter_by_extension(): - search = sc.code_search(query="gsub ext:erb") + search = sc.search("gsub ext:erb") for result in search.results: assert result.filename.endswith(".erb") def test_code_result(): - code = sc.code_result(4061576) + code = sc.code(4061576) assert isinstance(code, str) assert "This file is part of Quake III Arena source code" in code -""" -def test_related_results(): - related = sc.related_results(4061576) - assert isinstance(related, list) - assert len(related) == 0 -""" +# deprecated (for now) +# def test_related_results(): +# related = sc.related_results(4061576) +# assert isinstance(related, list) +# assert len(related) == 0 From d9acaaccef914e5b16b831fa66320708b0596b19 Mon Sep 17 00:00:00 2001 From: Ritchie Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Thu, 8 May 2025 03:42:07 +0200 Subject: [PATCH 2/2] Added a CLI with code highlighting, and table display for the results... also, code got refactored --- pyproject.toml | 2 +- searchcode/cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 493b12c..c33d8f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "searchcode" version = "0.3.0" -description = "Python SDK and CLI utility for Searchcode." +description = "Simple, comprehensive code search." authors = ["Ritchie Mwewa "] license = "GPLv3+" readme = "README.md" diff --git a/searchcode/cli.py b/searchcode/cli.py index d581474..d1cac55 100644 --- a/searchcode/cli.py +++ b/searchcode/cli.py @@ -17,7 +17,7 @@ @click.group() def cli(): """ - Searchcode: Simple comprehensive code search. + Searchcode: Simple, comprehensive code search. """ ...