Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate and Enhance Type Checking with Mypy #264

Merged
merged 11 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
41 changes: 41 additions & 0 deletions .github/workflows/mypy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Continuous Integration
jxnl marked this conversation as resolved.
Show resolved Hide resolved

on:
push:
pull_request:
branches: [ main ]

env:
WORKING_DIRECTORY: "."
MYPY_OUTPUT_FILENAME: "mypy.log"
CUSTOM_FLAGS: "--python-version=3.9 --color-output --no-pretty --follow-imports=skip"
CUSTOM_PACKAGES: "instructor/cli/cli.py instructor/cli/usage.py"

jobs:
continuous-integration:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Environment setup
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install dev dependencies
run: |
python3 -m pip install --upgrade pip setuptools wheel
python3 -m pip install -r requirements.txt
- name: Run Continuous Integration Action
run: |
export CUSTOM_PACKAGES="${{ env.CUSTOM_PACKAGES }}" &&
export CUSTOM_FLAGS="${{ env.CUSTOM_FLAGS }}" &&
curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/continuous-integration/scripts/devops/continuous-integration/type_mypy.sh |
bash | tee ${{ env.WORKING_DIRECTORY }}/${{ env.MYPY_OUTPUT_FILENAME }}
jxnl marked this conversation as resolved.
Show resolved Hide resolved
- name: Upload Artifacts
uses: actions/upload-artifact@v3
with:
name: mypy-log
path: ${{ env.WORKING_DIRECTORY }}/${{ env.MYPY_OUTPUT_FILENAME }}
29 changes: 29 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Reference:
# https://github.com/openai/openai-python/blob/main/mypy.ini
# https://github.com/pytorch/pytorch/blob/main/mypy.ini
[mypy]
pretty=True
show_error_codes=True
python_version=3.9

strict_equality=True
implicit_reexport=True
check_untyped_defs=True
no_implicit_optional=True

warn_return_any=False
warn_unreachable=True
warn_unused_configs=True

# Turn these options off as it could cause conflicts
# with the Pyright options.
warn_unused_ignores=False
warn_redundant_casts=False

disallow_any_generics=True
disallow_untyped_defs=True
disallow_untyped_calls=True
disallow_subclassing_any=True
disallow_incomplete_defs=True
disallow_untyped_decorators=True
cache_fine_grained=True
15 changes: 14 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,17 @@ repos:
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format
- id: ruff-format
- repo: local
hooks:
- id: ci_type_mypy
name: Static Type Check
entry: >
bash -c 'set -o pipefail;
export CUSTOM_PACKAGES="instructor/cli/cli.py instructor/cli/usage.py" &&
export CUSTOM_FLAGS="--python-version=3.9 --color-output --no-pretty" &&
curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/continuous-integration/scripts/devops/continuous-integration/type_mypy.sh |
bash'
language: system
types: [python]
pass_filenames: false
Comment on lines +11 to +23
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new ci_type_mypy hook is correctly configured to perform static type checks using mypy. The entry script is fetched from a remote repository, which could be a potential security risk if the script content changes. Consider pinning the script to a specific commit hash to mitigate this risk.

Empty file added instructor/_types/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions instructor/_types/_alias.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import Literal

from typing_extensions import TypeAlias

ModelNames: TypeAlias = Literal[
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-4",
"gpt-4-32k",
"text-embedding-ada-002",
"text-embedding-ada-002-v2",
]
35 changes: 25 additions & 10 deletions instructor/cli/usage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import List, Dict, Any, Union, DefaultDict
from datetime import datetime, timedelta
import typer
import os
Expand All @@ -9,21 +9,24 @@
from rich.table import Table
from rich.progress import Progress

from instructor._types._alias import ModelNames


app = typer.Typer()
console = Console()

api_key = os.environ.get("OPENAI_API_KEY")


async def fetch_usage(date: str) -> dict:
async def fetch_usage(date: str) -> Dict[str, Any]:
headers = {"Authorization": f"Bearer {api_key}"}
url = f"https://api.openai.com/v1/usage?date={date}"
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as resp:
return await resp.json()


async def get_usage_for_past_n_days(n_days: int) -> List[dict]:
async def get_usage_for_past_n_days(n_days: int) -> List[Dict[str, Any]]:
tasks = []
all_data = []
with Progress() as progress:
Expand All @@ -43,7 +46,13 @@ async def get_usage_for_past_n_days(n_days: int) -> List[dict]:


# Define the cost per unit for each model
MODEL_COSTS = {
# Add temporary body type hint here because mypy may infer the dict type
# from the first few items (?) in the dict, which may not be representative of
# the entire dict.
MODEL_COSTS: Dict[
ModelNames,
Union[Dict[str, float], float],
] = {
"gpt-3.5-turbo": {"prompt": 0.0015 / 1000, "completion": 0.002 / 1000},
"gpt-3.5-turbo-16k": {"prompt": 0.003 / 1000, "completion": 0.004 / 1000},
"gpt-4": {"prompt": 0.03 / 1000, "completion": 0.03 / 1000},
Expand All @@ -53,7 +62,9 @@ async def get_usage_for_past_n_days(n_days: int) -> List[dict]:
}


def get_model_cost(model: str):
def get_model_cost(
model: ModelNames,
) -> Union[Dict[str, float], float]:
"""Get the cost details for a given model."""
if model in MODEL_COSTS:
return MODEL_COSTS[model]
Expand All @@ -72,7 +83,9 @@ def get_model_cost(model: str):


def calculate_cost(
snapshot_id: str, n_context_tokens: int, n_generated_tokens: int
snapshot_id: ModelNames,
n_context_tokens: int,
n_generated_tokens: int,
) -> float:
"""Calculate the cost based on the snapshot ID and number of tokens."""
cost = get_model_cost(snapshot_id)
Expand All @@ -85,9 +98,11 @@ def calculate_cost(
return prompt_cost + completion_cost


def group_and_sum_by_date_and_snapshot(usage_data: List[dict]) -> Table:
def group_and_sum_by_date_and_snapshot(usage_data: List[Dict[str, Any]]) -> Table:
"""Group and sum the usage data by date and snapshot, including costs."""
summary = defaultdict(
summary: DefaultDict[
str, DefaultDict[str, Dict[str, Union[int, float]]]
] = defaultdict(
lambda: defaultdict(
lambda: {"total_requests": 0, "total_tokens": 0, "total_cost": 0.0}
)
Expand Down Expand Up @@ -131,10 +146,10 @@ def group_and_sum_by_date_and_snapshot(usage_data: List[dict]) -> Table:
return table


@app.command(help="Displays OpenAI API usage data for the past N days.")
@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore
def list(
n: int = typer.Option(0, help="Number of days."),
):
) -> None:
all_data = asyncio.run(get_usage_for_past_n_days(n))
table = group_and_sum_by_date_and_snapshot(all_data)
console.print(table)
Expand Down
3 changes: 3 additions & 0 deletions instructor/sandbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a = 1
b = 2
c = a != b