Skip to content

Commit

Permalink
Hallacy/11 4 release (#54)
Browse files Browse the repository at this point in the history
* Make embeddings_utils be importable (#104)

* Make embeddings_utils be importable

* Small tweaks to dicts for typing

* Remove default api_prefix and move v1 prefix to default api_base (#95)

* make construct_from key argument optional (#92)

* Split search.prepare_data into answers/classifications/search versions (#93)

* Break out prepare_data into answers, classifications, and search

* And cleaned up CLI

* Validate search files (#69)

* Add validators for search files

* Clean up fields

Co-authored-by: kennyhsu5 <1762087+kennyhsu5@users.noreply.github.com>
Co-authored-by: Madeleine Thompson <madeleine@openai.com>
  • Loading branch information
3 people committed Dec 14, 2021
1 parent 88bbe08 commit b39bddd
Show file tree
Hide file tree
Showing 19 changed files with 246 additions and 115 deletions.
2 changes: 1 addition & 1 deletion examples/embeddings/Classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
}
],
"source": [
"from utils import plot_multiclass_precision_recall\n",
"from openai.embeddings_utils import plot_multiclass_precision_recall\n",
"\n",
"plot_multiclass_precision_recall(probas, y_test, [1,2,3,4,5], clf)"
]
Expand Down
4 changes: 2 additions & 2 deletions examples/embeddings/Code_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@
}
],
"source": [
"from utils import get_embedding\n",
"from openai.embeddings_utils import get_embedding\n",
"\n",
"df = pd.DataFrame(all_funcs)\n",
"df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='babbage-code-search-code'))\n",
Expand Down Expand Up @@ -231,7 +231,7 @@
}
],
"source": [
"from utils import cosine_similarity\n",
"from openai.embeddings_utils import cosine_similarity\n",
"\n",
"def search_functions(df, code_query, n=3, pprint=True, n_lines=7):\n",
" embedding = get_embedding(code_query, engine='babbage-code-search-text')\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/embeddings/Obtain_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@
"metadata": {},
"outputs": [],
"source": [
"from utils import get_embedding\n",
"from openai.embeddings_utils import get_embedding\n",
"\n",
"# This will take just under 10 minutes\n",
"df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='babbage-similarity'))\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
}
],
"source": [
"from utils import get_embedding, cosine_similarity\n",
"from openai.embeddings_utils import get_embedding, cosine_similarity\n",
"\n",
"# search through the reviews for a specific product\n",
"def search_reviews(df, product_description, n=3, pprint=True):\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/embeddings/User_and_product_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
"metadata": {},
"outputs": [],
"source": [
"from utils import cosine_similarity\n",
"from openai.embeddings_utils import cosine_similarity\n",
"\n",
"# evaluate embeddings as recommendations on X_test\n",
"def evaluate_single_match(row):\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/embeddings/Zero-shot_classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
}
],
"source": [
"from utils import cosine_similarity, get_embedding\n",
"from openai.embeddings_utils import cosine_similarity, get_embedding\n",
"from sklearn.metrics import PrecisionRecallDisplay\n",
"\n",
"def evaluate_emeddings_approach(\n",
Expand Down
2 changes: 1 addition & 1 deletion openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
api_key_path: Optional[str] = os.environ.get("OPENAI_API_KEY_PATH")

organization = os.environ.get("OPENAI_ORGANIZATION")
api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com")
api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
api_version = None
verify_ssl_certs = True # No effect. Certificates are always verified.
proxy = None
Expand Down
6 changes: 4 additions & 2 deletions openai/api_resources/abstract/api_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class APIResource(OpenAIObject):
api_prefix = "v1"
api_prefix = ""

@classmethod
def retrieve(cls, id, api_key=None, request_id=None, **params):
Expand All @@ -28,7 +28,9 @@ def class_url(cls):
# Namespaces are separated in object names with periods (.) and in URLs
# with forward slashes (/), so replace the former with the latter.
base = cls.OBJECT_NAME.replace(".", "/") # type: ignore
return "/%s/%ss" % (cls.api_prefix, base)
if cls.api_prefix:
return "/%s/%ss" % (cls.api_prefix, base)
return "/%ss" % (base)

def instance_url(self):
id = self.get("id")
Expand Down
4 changes: 2 additions & 2 deletions openai/api_resources/abstract/engine_api_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ def class_url(cls, engine: Optional[str] = None):
# with forward slashes (/), so replace the former with the latter.
base = cls.OBJECT_NAME.replace(".", "/") # type: ignore
if engine is None:
return "/%s/%ss" % (cls.api_prefix, base)
return "/%ss" % (base)

extn = quote_plus(engine)
return "/%s/engines/%s/%ss" % (cls.api_prefix, extn, base)
return "/engines/%s/%ss" % (extn, base)

@classmethod
def create(
Expand Down
8 changes: 3 additions & 5 deletions openai/api_resources/answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@


class Answer(OpenAIObject):
api_prefix = "v1"

@classmethod
def get_url(self, base):
return "/%s/%s" % (self.api_prefix, base)
def get_url(self):
return "/answers"

@classmethod
def create(cls, **params):
instance = cls()
return instance.request("post", cls.get_url("answers"), params)
return instance.request("post", cls.get_url(), params)
8 changes: 3 additions & 5 deletions openai/api_resources/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@


class Classification(OpenAIObject):
api_prefix = "v1"

@classmethod
def get_url(self, base):
return "/%s/%s" % (self.api_prefix, base)
def get_url(self):
return "/classifications"

@classmethod
def create(cls, **params):
instance = cls()
return instance.request("post", cls.get_url("classifications"), params)
return instance.request("post", cls.get_url(), params)
7 changes: 2 additions & 5 deletions openai/api_resources/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@


class Search(APIResource):
api_prefix = "v1"
OBJECT_NAME = "search_indices"

@classmethod
def class_url(cls):
return "/%s/%s" % (cls.api_prefix, cls.OBJECT_NAME)
return "/search_indices/search"

@classmethod
def create_alpha(cls, **params):
instance = cls()
return instance.request("post", f"{cls.class_url()}/search", params)
return instance.request("post", cls.class_url(), params)
139 changes: 96 additions & 43 deletions openai/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import signal
import sys
import warnings
from functools import partial
from typing import Optional

import requests
Expand All @@ -11,10 +12,12 @@
from openai.upload_progress import BufferReader
from openai.validators import (
apply_necessary_remediation,
apply_optional_remediation,
apply_validators,
get_search_validators,
get_validators,
read_any_format,
write_out_file,
write_out_search_file,
)


Expand Down Expand Up @@ -227,6 +230,40 @@ def list(cls, args):


class Search:
@classmethod
def prepare_data(cls, args, purpose):

sys.stdout.write("Analyzing...\n")
fname = args.file
auto_accept = args.quiet

optional_fields = ["metadata"]

if purpose == "classifications":
required_fields = ["text", "labels"]
else:
required_fields = ["text"]

df, remediation = read_any_format(
fname, fields=required_fields + optional_fields
)

if "metadata" not in df:
df["metadata"] = None

apply_necessary_remediation(None, remediation)
validators = get_search_validators(required_fields, optional_fields)

write_out_file_func = partial(
write_out_search_file,
purpose=purpose,
fields=required_fields + optional_fields,
)

apply_validators(
df, fname, remediation, validators, auto_accept, write_out_file_func
)

@classmethod
def create_alpha(cls, args):
resp = openai.Search.create_alpha(
Expand Down Expand Up @@ -489,49 +526,14 @@ def prepare_data(cls, args):

validators = get_validators()

optional_remediations = []
if remediation is not None:
optional_remediations.append(remediation)
for validator in validators:
remediation = validator(df)
if remediation is not None:
optional_remediations.append(remediation)
df = apply_necessary_remediation(df, remediation)

any_optional_or_necessary_remediations = any(
[
remediation
for remediation in optional_remediations
if remediation.optional_msg is not None
or remediation.necessary_msg is not None
]
apply_validators(
df,
fname,
remediation,
validators,
auto_accept,
write_out_file_func=write_out_file,
)
any_necessary_applied = any(
[
remediation
for remediation in optional_remediations
if remediation.necessary_msg is not None
]
)
any_optional_applied = False

if any_optional_or_necessary_remediations:
sys.stdout.write(
"\n\nBased on the analysis we will perform the following actions:\n"
)
for remediation in optional_remediations:
df, optional_applied = apply_optional_remediation(
df, remediation, auto_accept
)
any_optional_applied = any_optional_applied or optional_applied
else:
sys.stdout.write("\n\nNo remediations found.\n")

any_optional_or_necessary_applied = (
any_optional_applied or any_necessary_applied
)

write_out_file(df, fname, any_optional_or_necessary_applied, auto_accept)


def tools_register(parser):
Expand Down Expand Up @@ -561,6 +563,57 @@ def help(args):
)
sub.set_defaults(func=FineTune.prepare_data)

sub = subparsers.add_parser("search.prepare_data")
sub.add_argument(
"-f",
"--file",
required=True,
help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing text examples to be analyzed."
"This should be the local file path.",
)
sub.add_argument(
"-q",
"--quiet",
required=False,
action="store_true",
help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
)
sub.set_defaults(func=partial(Search.prepare_data, purpose="search"))

sub = subparsers.add_parser("classifications.prepare_data")
sub.add_argument(
"-f",
"--file",
required=True,
help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing text-label examples to be analyzed."
"This should be the local file path.",
)
sub.add_argument(
"-q",
"--quiet",
required=False,
action="store_true",
help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
)
sub.set_defaults(func=partial(Search.prepare_data, purpose="classification"))

sub = subparsers.add_parser("answers.prepare_data")
sub.add_argument(
"-f",
"--file",
required=True,
help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing text examples to be analyzed."
"This should be the local file path.",
)
sub.add_argument(
"-q",
"--quiet",
required=False,
action="store_true",
help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
)
sub.set_defaults(func=partial(Search.prepare_data, purpose="answer"))


def api_register(parser):
# Engine management
Expand Down
Loading

0 comments on commit b39bddd

Please sign in to comment.