Skip to content

Commit

Permalink
Integrate and Enhance Type Checking with Mypy (#264)
Browse files Browse the repository at this point in the history
Co-authored-by: Jason Liu <jxnl@users.noreply.github.com>
  • Loading branch information
gao-hongnan and jxnl committed Dec 12, 2023
1 parent 161f0dd commit 94e3296
Show file tree
Hide file tree
Showing 9 changed files with 304 additions and 142 deletions.
41 changes: 41 additions & 0 deletions .github/workflows/mypy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Continuous Integration

on:
push:
pull_request:
branches: [ main ]

env:
WORKING_DIRECTORY: "."
MYPY_OUTPUT_FILENAME: "mypy.log"
CUSTOM_FLAGS: "--python-version=3.9 --color-output --no-pretty --follow-imports=skip"
CUSTOM_PACKAGES: "instructor/cli/cli.py instructor/cli/usage.py"

jobs:
MyPy:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Environment setup
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install dev dependencies
run: |
python3 -m pip install --upgrade pip setuptools wheel
python3 -m pip install -r requirements.txt
- name: Run Continuous Integration Action
run: |
export CUSTOM_PACKAGES="${{ env.CUSTOM_PACKAGES }}" &&
export CUSTOM_FLAGS="${{ env.CUSTOM_FLAGS }}" &&
curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/continuous-integration/scripts/devops/continuous-integration/type_mypy.sh |
bash | tee ${{ env.WORKING_DIRECTORY }}/${{ env.MYPY_OUTPUT_FILENAME }}
- name: Upload Artifacts
uses: actions/upload-artifact@v3
with:
name: mypy-log
path: ${{ env.WORKING_DIRECTORY }}/${{ env.MYPY_OUTPUT_FILENAME }}
29 changes: 29 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Reference:
# https://github.com/openai/openai-python/blob/main/mypy.ini
# https://github.com/pytorch/pytorch/blob/main/mypy.ini
[mypy]
pretty=True
show_error_codes=True
python_version=3.9

strict_equality=True
implicit_reexport=True
check_untyped_defs=True
no_implicit_optional=True

warn_return_any=False
warn_unreachable=True
warn_unused_configs=True

# Turn these options off as it could cause conflicts
# with the Pyright options.
warn_unused_ignores=False
warn_redundant_casts=False

disallow_any_generics=True
disallow_untyped_defs=True
disallow_untyped_calls=True
disallow_subclassing_any=True
disallow_incomplete_defs=True
disallow_untyped_decorators=True
cache_fine_grained=True
15 changes: 14 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,17 @@ repos:
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format
- id: ruff-format
- repo: local
hooks:
- id: ci_type_mypy
name: Static Type Check
entry: >
bash -c 'set -o pipefail;
export CUSTOM_PACKAGES="instructor/cli/cli.py instructor/cli/usage.py" &&
export CUSTOM_FLAGS="--python-version=3.9 --color-output --no-pretty" &&
curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/continuous-integration/scripts/devops/continuous-integration/type_mypy.sh |
bash'
language: system
types: [python]
pass_filenames: false
Empty file added instructor/_types/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions instructor/_types/_alias.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import Literal

from typing_extensions import TypeAlias

ModelNames: TypeAlias = Literal[
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-4",
"gpt-4-32k",
"text-embedding-ada-002",
"text-embedding-ada-002-v2",
]
35 changes: 25 additions & 10 deletions instructor/cli/usage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import List, Dict, Any, Union, DefaultDict
from datetime import datetime, timedelta
import typer
import os
Expand All @@ -9,21 +9,24 @@
from rich.table import Table
from rich.progress import Progress

from instructor._types._alias import ModelNames


app = typer.Typer()
console = Console()

api_key = os.environ.get("OPENAI_API_KEY")


async def fetch_usage(date: str) -> dict:
async def fetch_usage(date: str) -> Dict[str, Any]:
headers = {"Authorization": f"Bearer {api_key}"}
url = f"https://api.openai.com/v1/usage?date={date}"
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as resp:
return await resp.json()


async def get_usage_for_past_n_days(n_days: int) -> List[dict]:
async def get_usage_for_past_n_days(n_days: int) -> List[Dict[str, Any]]:
tasks = []
all_data = []
with Progress() as progress:
Expand All @@ -43,7 +46,13 @@ async def get_usage_for_past_n_days(n_days: int) -> List[dict]:


# Define the cost per unit for each model
MODEL_COSTS = {
# Add temporary body type hint here because mypy may infer the dict type
# from the first few items (?) in the dict, which may not be representative of
# the entire dict.
MODEL_COSTS: Dict[
ModelNames,
Union[Dict[str, float], float],
] = {
"gpt-3.5-turbo": {"prompt": 0.0015 / 1000, "completion": 0.002 / 1000},
"gpt-3.5-turbo-16k": {"prompt": 0.003 / 1000, "completion": 0.004 / 1000},
"gpt-4": {"prompt": 0.03 / 1000, "completion": 0.03 / 1000},
Expand All @@ -53,7 +62,9 @@ async def get_usage_for_past_n_days(n_days: int) -> List[dict]:
}


def get_model_cost(model: str):
def get_model_cost(
model: ModelNames,
) -> Union[Dict[str, float], float]:
"""Get the cost details for a given model."""
if model in MODEL_COSTS:
return MODEL_COSTS[model]
Expand All @@ -72,7 +83,9 @@ def get_model_cost(model: str):


def calculate_cost(
snapshot_id: str, n_context_tokens: int, n_generated_tokens: int
snapshot_id: ModelNames,
n_context_tokens: int,
n_generated_tokens: int,
) -> float:
"""Calculate the cost based on the snapshot ID and number of tokens."""
cost = get_model_cost(snapshot_id)
Expand All @@ -85,9 +98,11 @@ def calculate_cost(
return prompt_cost + completion_cost


def group_and_sum_by_date_and_snapshot(usage_data: List[dict]) -> Table:
def group_and_sum_by_date_and_snapshot(usage_data: List[Dict[str, Any]]) -> Table:
"""Group and sum the usage data by date and snapshot, including costs."""
summary = defaultdict(
summary: DefaultDict[
str, DefaultDict[str, Dict[str, Union[int, float]]]
] = defaultdict(
lambda: defaultdict(
lambda: {"total_requests": 0, "total_tokens": 0, "total_cost": 0.0}
)
Expand Down Expand Up @@ -131,10 +146,10 @@ def group_and_sum_by_date_and_snapshot(usage_data: List[dict]) -> Table:
return table


@app.command(help="Displays OpenAI API usage data for the past N days.")
@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore
def list(
n: int = typer.Option(0, help="Number of days."),
):
) -> None:
all_data = asyncio.run(get_usage_for_past_n_days(n))
table = group_and_sum_by_date_and_snapshot(all_data)
console.print(table)
Expand Down

0 comments on commit 94e3296

Please sign in to comment.