diff --git a/README.md b/README.md
index 83573f7..a4217ae 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,48 @@
-

An unofficial Python SDK for SearchCode.
Search 75 billion lines of code from 40 million projects
+
Python SDK for Searchcode.
Search 75 billion lines of code from 40 million projects
-## Table Of Contents
-* [Code_search](#code_search)
- * [Example (Without Filters)](#example-without-filters)
- * [Example Language Filter (Java and Javascript)](#example-language-filter-java-and-javascript)
- * [Example Source Filter (Bitbucket and CodePlex)](#example-source-filter-bitbucket-and-codeplex)
- * [Example Lines of Code Filter (Between 500 and 1000)](#example-lines-of-code-filter-between-500-and-1000)
- * [Example (JSONP)](#example-jsonp)
- * [Response Attribute Definitions](#response-attribute-definitions)
-* [code_result](#code_result)
- * [Example](#example)
-* [related_results](#related_results)
- * [Example](#example-1)
- * [Response Attribute Definitions](#response-attribute-definitions-1)
-* [About Searchcode](#about-searchcode)
-* [Credit](#credit)
-
-***
-
-## code_search
+## Installation
+
+```bash
+
+pip install searchcode
+
+```
+
+## Documentation
+
+### Code Search
Queries the code index and returns at most 100 results.
-> [!TIP]
-All filters supported by searchcode are available. These include
-`sources`, `languages` and `lines_of_code`. These work in the same way that the main page works; See the examples below for how to use them.
+#### Params
-> [!TIP]
-To fetch all results for a given query, keep incrementing the `page` parameter until you get a page with an empty
-results list.
+- `query`: Search term (required).
+ - The following filters are textual and can be added into query directly
+ - Filter by file extention **ext:EXTENTION** e.g., _"gsub ext:erb"_
+ - Filter by language **lang:LANGUAGE** e.g., _"import lang:python"_
+ - Filter by repository **repo:REPONAME** e.g., _"float Q_rsqrt repo:quake"_
+ - Filter by user/repository **repo:USERNAME/REPONAME** e.g., _"batf repo:boyter/batf"_
+- `page`: Result page starting at 0 through to 49
+- `per_page`: Number of results wanted per page (max 100).
+- `languages`: List of programming languages to filter by.
+- `sources`: List of code sources (e.g., GitHub, BitBucket).
+- `lines_of_code_gt`: Filter to sources with greater lines of code than supplied int. Valid values 0 to 10000.
+- `lines_of_code_lt`: Filter to sources with less lines of code than supplied int. Valid values 0 to 10000.
+- `callback`: Callback function (JSONP only)
-> [!IMPORTANT]
-If the results list is empty, then this indicates that you have reached the end of the available results.
+> If the results list is empty, then this indicates that you have reached the end of the available results.
+> To fetch all results for a given query, keep incrementing `page` parameter until you get a page with an empty results
+> list.
-### Example (Without Filters):
+#### Code Search Without Filters
```python
+
import searchcode as sc
search = sc.code_search(query="test")
@@ -49,9 +51,10 @@ for result in search.results:
print(result)
```
-### Example Language Filter (Java and Javascript):
+#### Filter by Language (Java and JavaScript)
```python
+
import searchcode as sc
search = sc.code_search(query="test", languages=["Java", "JavaScript"])
@@ -60,9 +63,10 @@ for result in search.results:
print(result.language)
```
-### Example Source Filter (Bitbucket and CodePlex):
+#### Filter by Source (BitBucket and CodePlex)
```python
+
import searchcode as sc
search = sc.code_search(query="test", sources=["BitBucket", "CodePlex"])
@@ -71,29 +75,28 @@ for result in search.results:
print(result.filename)
```
-### Example Lines of Code Filter (Between 500 and 1000):
+#### Filter by Lines of Code (Between 500 and 1000)
```python
+
import searchcode as sc
-search = sc.code_search(query="test", lines_of_code=500, lines_of_code2=1000)
+search = sc.code_search(query="test", lines_of_code_gt=500, lines_of_code_lt=1000)
for result in search.results:
- print(result.linescount)
+ print(result)
```
-### Example (JSONP):
+#### With Callback Function (JSONP only)
```python
import searchcode as sc
-search = sc.code_search(query="soup", page=1, callback="myCallback")
-
-for result in search.results:
- print(result)
+search = sc.code_search(query="test", callback="myCallback")
+print(search)
```
-### Response Attribute Definitions
+#### Response Attribute Definitions
| Attribute | Description |
|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -124,39 +127,39 @@ for result in search.results:
| **md5hash** | Calculated MD5 hash of the file's contents. |
| **lines** | Contains line numbers and lines which match the `searchterm`. Lines immediately before and after the match are included. If only the filename matches, up to the first 15 lines of the file are returned. |
-## code_result
+### Code Result
+
+Returns the raw data from a code file given the code id which can be found as the `id` in a code search result.
-Returns the raw data from a code file given the code id which can be found as the id in a code search result.
+#### Params
-### Example:
+- `_id`: Unique identifier for the code file (required).
```python
+
import searchcode as sc
-result = sc.code_result(id=4061576)
-print(result)
+code = sc.code_result(4061576)
+print(code)
```
-> Returns raw data from the code file.
-
-## related_results
+### Related Results
-Returns an array of results given a searchcode unique code id which are considered to be duplicates.
+Returns an array of results given a searchcode unique code id which are considered to be duplicates.
-> [!IMPORTANT]
-The matching is
-slightly fuzzy allowing so that small differences between files are ignored.
+#### Params
-### Example:
+- `_id`: Unique identifier for the code file (required).
```python
+
import searchcode as sc
-related = sc.related_results(id=4061576)
+related = sc.related_results(4061576)
print(related)
```
-### Response Attribute Definitions
+#### Response Attribute Definitions
| Attribute | Description |
|----------------|------------------------------------------------------------------------------------------|
@@ -172,8 +175,13 @@ print(related)
## About Searchcode
-Read more about searchcode [here](https://searchcode.com/about/)
+Searchcode is a simple, comprehensive source code search engine that indexes billions of lines of code from open-source
+projects,
+helping you find real world examples of functions, API's and libraries in 243 languages across 10+ public code sources.
+
+[Learn more](https://searchcode.com/about)
-## Credit
+## Acknowledgements
-Special thanks to [Ben Boyter](https://boyter.org/about/), developer of [searchcode.com](https://searchcode.com)
+This SDK is developed and maintained by [Richard Mwewa](https://gravatar.com/rly0nheart), in collaboration
+with [Ben Boyter](https://boyter.org/about/), the creator of [Searchcode.com](https://searchcode.com).
diff --git a/pyproject.toml b/pyproject.toml
index b0e977e..28fe91e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,9 @@
[tool.poetry]
name = "searchcode"
-version = "0.1.1"
-description = "An unofficial Python SDK for Searchcode."
+version = "0.2.0"
+description = "Python SDK for Searchcode."
authors = ["Richard Mwewa "]
-license = "GPL-3.0+ License"
+license = "GPLv3+"
readme = "README.md"
homepage = "https://searchcode.com"
repository = "https://github.com/rly0nheart/searchcode-sdk"
diff --git a/searchcode/_main.py b/searchcode/_main.py
index 2215e0f..b654c36 100644
--- a/searchcode/_main.py
+++ b/searchcode/_main.py
@@ -17,17 +17,13 @@
def _get_response(
- endpoint: str,
- headers: Optional[Dict] = None,
- params: Optional[List[Tuple[str, str]]] = None,
+ endpoint: str, params: Optional[List[Tuple[str, str]]] = None, **kwargs
) -> Union[Dict, List, str]:
"""
Sends a GET request to the specified endpoint with the given headers and parameters.
:param endpoint: The API endpoint to send the request to.
:type endpoint: str
- :param headers: Optional HTTP headers to include in the request.
- :type headers: Optional[Dict]
:param params: Optional list of query parameters as key-value tuples.
:type params: Optional[List[Tuple[str, str]]]
:return: The parsed JSON response, which could be a dictionary, list, or string.
@@ -35,35 +31,34 @@ def _get_response(
:raises Exception: If the request fails or the server returns an error.
"""
- try:
- response = requests.get(url=endpoint, params=params, headers=headers)
- response.raise_for_status()
- return response.json()
+ response = requests.get(url=endpoint, params=params)
+ response.raise_for_status()
+ return response.text if kwargs.get("is_callback") else response.json()
- except Exception as e:
- raise e
-
-def _object_to_namespace(
- obj: Union[List[Dict], Dict]
+def _response_to_namespace_obj(
+ response: Union[List[Dict], Dict]
) -> Union[List[SimpleNamespace], SimpleNamespace, List[Dict], Dict]:
"""
- Recursively converts dictionaries and lists of dictionaries into SimpleNamespace objects.
+ Recursively converts the API response into a SimpleNamespace object(s).
- :param obj: The object to convert, either a dictionary or a list of dictionaries.
- :type obj: Union[List[Dict], Dict]
+ :param response: The object to convert, either a dictionary or a list of dictionaries.
+ :type response: Union[List[Dict], Dict]
:return: A SimpleNamespace object or list of SimpleNamespace objects.
:rtype: Union[List[SimpleNamespace], SimpleNamespace, None]
"""
- if isinstance(obj, Dict):
+ if isinstance(response, Dict):
return SimpleNamespace(
- **{key: _object_to_namespace(obj=value) for key, value in obj.items()}
+ **{
+ key: _response_to_namespace_obj(response=value)
+ for key, value in response.items()
+ }
)
- elif isinstance(obj, List):
- return [_object_to_namespace(obj=item) for item in obj]
+ elif isinstance(response, List):
+ return [_response_to_namespace_obj(response=item) for item in response]
else:
- return obj
+ return response
def code_search(
@@ -72,10 +67,10 @@ def code_search(
per_page: int = 100,
languages: Optional[List[CODE_LANGUAGES]] = None,
sources: Optional[List[CODE_SOURCES]] = None,
- lines_of_code: Optional[int] = None,
- lines_of_code2: Optional[int] = None,
+ lines_of_code_gt: Optional[int] = None,
+ lines_of_code_lt: Optional[int] = None,
callback: Optional[str] = None,
-) -> SimpleNamespace:
+) -> Union[SimpleNamespace, str]:
"""
Searches and returns code snippets matching the query.
@@ -98,10 +93,10 @@ def code_search(
:param sources: Allows filtering to sources supplied by return types.
Supply multiple to filter to multiple sources.
:type sources: Optional[List[CODE_SOURCES]]
- :param lines_of_code: Filter to sources with greater lines of code then supplied int. Valid values 0 to 10000.
- :type lines_of_code: int
- :param lines_of_code2: Filter to sources with fewer lines of code then supplied int. Valid values 0 to 10000.
- :type lines_of_code2: int
+ :param lines_of_code_gt: Filter to sources with greater lines of code than supplied int. Valid values 0 to 10000.
+ :type lines_of_code_gt: int
+ :param lines_of_code_lt: Filter to sources with fewer lines of code than supplied int. Valid values 0 to 10000.
+ :type lines_of_code_lt: int
:param callback: Callback function (JSONP only)
:type callback: str
:return: The search results as a SimpleNamespace object.
@@ -112,47 +107,48 @@ def code_search(
source_ids = [] if not sources else get_source_ids(source_names=sources)
response = _get_response(
- endpoint=f"{_BASE_API_ENDPOINT}/codesearch_I/",
+ endpoint=f"{_BASE_API_ENDPOINT}/{'jsonp_codesearch_I' if callback else 'codesearch_I'}/",
params=[
("q", query),
("p", page),
("per_page", per_page),
- ("loc", lines_of_code),
- ("loc2", lines_of_code2),
+ ("loc", lines_of_code_gt),
+ ("loc2", lines_of_code_lt),
("callback", callback),
*[("lan", language_id) for language_id in language_ids],
*[("src", source_id) for source_id in source_ids],
],
+ is_callback=callback,
)
- return _object_to_namespace(obj=response)
+ return _response_to_namespace_obj(response=response)
-def code_result(id: int) -> SimpleNamespace:
+def code_result(_id: int) -> SimpleNamespace:
"""
Returns the raw data from a code file given the code ID which can be found as the `id` in a code search result.
- :param id: The unique identifier of the code result.
- :type id: int
+ :param _id: The unique identifier of the code result.
+ :type _id: int
:return: The code result details as a SimpleNamespace object.
:rtype: SimpleNamespace
"""
- response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/result/{id}")
+ response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/result/{_id}")
return response.get("code")
-def related_results(id: int) -> SimpleNamespace:
+def related_results(_id: int) -> SimpleNamespace:
"""
Returns an array of results given a searchcode unique code id which are considered to be duplicates.
The matching is slightly fuzzy allowing so that small differences between files are ignored.
- :param id: The unique identifier of the code result.
- :type id: int
+ :param _id: The unique identifier of the code result.
+ :type _id: int
:return: A list of related results as a SimpleNamespace object.
:rtype: SimpleNamespace
"""
- response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/related_results/{id}")
- return _object_to_namespace(obj=response)
+ response = _get_response(endpoint=f"{_BASE_API_ENDPOINT}/related_results/{_id}")
+ return _response_to_namespace_obj(response=response)
diff --git a/tests/test_searchcode.py b/tests/test_searchcode.py
index 25bcc86..c0ee2cd 100644
--- a/tests/test_searchcode.py
+++ b/tests/test_searchcode.py
@@ -1,21 +1,26 @@
import searchcode as sc
-def test_code_search():
- search = sc.code_search(query="fn main()", per_page=10, languages=["Rust", "Go"])
- assert len(search.results) >= 10
+def test_filter_by_language():
+ search = sc.code_search(query="fn main()", languages=["Rust", "Go"])
for result in search.results:
assert result.filename.endswith(".rs") or result.filename.endswith(".go")
assert result.language in ["Rust", "Go"]
+def test_filter_by_extension():
+ search = sc.code_search(query="gsub ext:erb")
+ for result in search.results:
+ assert result.filename.endswith(".erb")
+
+
def test_code_result():
- code_result = sc.code_result(id=4061576)
- assert isinstance(code_result, str)
- assert "This file is part of Quake III Arena source code" in code_result
+ code = sc.code_result(4061576)
+ assert isinstance(code, str)
+ assert "This file is part of Quake III Arena source code" in code
def test_related_results():
- related = sc.related_results(id=4061576)
+ related = sc.related_results(4061576)
assert isinstance(related, list)
assert len(related) == 0