lint documentation (#403)

jxnl · Feb 5, 2024 · edc22b8 · edc22b8
1 parent a56ae6b
commit edc22b8
Show file tree

Hide file tree

Showing 44 changed files with 583 additions and 286 deletions.
diff --git a/docs/blog/posts/anyscale.md b/docs/blog/posts/anyscale.md
@@ -50,27 +50,22 @@ class UserDetails(BaseModel):
     name: str
     age: int
 
+
 # enables `response_model` in create call
 client = instructor.patch(
     OpenAI(
         base_url="https://api.endpoints.anyscale.com/v1",
-        api_key="<YOUR_ANYSCALE_API_KEY>"
+        api_key="<YOUR_ANYSCALE_API_KEY>",
     ),
     # This uses Anyscale's json schema output mode
-    mode=instructor.Mode.JSON_SCHEMA
+    mode=instructor.Mode.JSON_SCHEMA,
 )
 
 resp = client.chat.completions.create(
     model="mistralai/Mixtral-8x7B-Instruct-v0.1",
     messages=[
-        {
-            "role": "system",
-            "content": "You are a world class extractor"
-        },
-        {
-            "role": "user",
-            "content": 'Extract the following entities: "Jason is 20"'
-        },
+        {"role": "system", "content": "You are a world class extractor"},
+        {"role": "user", "content": 'Extract the following entities: "Jason is 20"'},
     ],
     response_model=UserDetails,
 )

diff --git a/docs/blog/posts/caching.md b/docs/blog/posts/caching.md
@@ -30,18 +30,20 @@ from pydantic import BaseModel
 # Enables `response_model`
 client = instructor.patch(OpenAI())
 
+
 class UserDetail(BaseModel):
     name: str
     age: int
 
+
 def extract(data) -> UserDetail:
     return client.chat.completions.create(
-    model="gpt-3.5-turbo",
-    response_model=UserDetail,
-    messages=[
-        {"role": "user", "content": data},
-    ]
-)
+        model="gpt-3.5-turbo",
+        response_model=UserDetail,
+        messages=[
+            {"role": "user", "content": data},
+        ],
+    )
 ```
 
 Now imagine batch processing data, running tests or experiments, or simply calling `extract` multiple times over a workflow. We'll quickly run into performance issues, as the function may be called repeatedly, and the same data will be processed over and over again, costing us time and money.
@@ -53,14 +55,15 @@ Now imagine batch processing data, running tests or experiments, or simply calli
 ```python
 import functools
 
+
 @functools.cache
 def extract(data):
     return client.chat.completions.create(
         model="gpt-3.5-turbo",
         response_model=UserDetail,
         messages=[
             {"role": "user", "content": data},
-        ]
+        ],
     )
 ```
 
@@ -128,12 +131,13 @@ print(f"Time taken: {time.perf_counter() - start}")
     import inspect
     import diskcache
 
-    cache = diskcache.Cache('./my_cache_directory') # (1)
+    cache = diskcache.Cache('./my_cache_directory')  # (1)
+
 
     def instructor_cache(func):
         """Cache a function that returns a Pydantic model"""
         return_type = inspect.signature(func).return_annotation
-        if not issubclass(return_type, BaseModel): # (2)
+        if not issubclass(return_type, BaseModel):  # (2)
             raise ValueError("The return type must be a Pydantic model")
 
         @functools.wraps(func)
@@ -176,13 +180,15 @@ cache = diskcache.Cache('./my_cache_directory')
 
 def instructor_cache(func):
     """Cache a function that returns a Pydantic model"""
-    return_type = inspect.signature(func).return_annotation # (4)
-    if not issubclass(return_type, BaseModel): # (1)
+    return_type = inspect.signature(func).return_annotation  # (4)
+    if not issubclass(return_type, BaseModel):  # (1)
         raise ValueError("The return type must be a Pydantic model")
 
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
-        key = f"{func.__name__}-{functools._make_key(args, kwargs, typed=False)}" #  (2)
+        key = (
+            f"{func.__name__}-{functools._make_key(args, kwargs, typed=False)}"  #  (2)
+        )
         # Check if the result is already cached
         if (cached := cache.get(key)) is not None:
             # Deserialize from JSON based on the return type (3)
@@ -197,18 +203,20 @@ def instructor_cache(func):
 
     return wrapper
 
+
 class UserDetail(BaseModel):
     name: str
     age: int
 
+
 @instructor_cache
 def extract(data) -> UserDetail:
     return client.chat.completions.create(
         model="gpt-3.5-turbo",
         response_model=UserDetail,
         messages=[
             {"role": "user", "content": data},
-        ]
+        ],
     )
 ```
 
@@ -232,6 +240,7 @@ def extract(data) -> UserDetail:
 
     cache = redis.Redis("localhost")
 
+
     def instructor_cache(func):
         """Cache a function that returns a Pydantic model"""
         return_type = inspect.signature(func).return_annotation
@@ -264,7 +273,6 @@ def extract(data) -> UserDetail:
 import redis
 import functools
 import inspect
-import json
 import instructor
 
 from pydantic import BaseModel
@@ -273,15 +281,16 @@ from openai import OpenAI
 client = instructor.patch(OpenAI())
 cache = redis.Redis("localhost")
 
+
 def instructor_cache(func):
     """Cache a function that returns a Pydantic model"""
     return_type = inspect.signature(func).return_annotation
-    if not issubclass(return_type, BaseModel): # (1)
+    if not issubclass(return_type, BaseModel):  # (1)
         raise ValueError("The return type must be a Pydantic model")
 
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
-        key = f"{func.__name__}-{functools._make_key(args, kwargs, typed=False)}" # (2)
+        key = f"{func.__name__}-{functools._make_key(args, kwargs, typed=False)}"  # (2)
         # Check if the result is already cached
         if (cached := cache.get(key)) is not None:
             # Deserialize from JSON based on the return type
@@ -301,6 +310,7 @@ class UserDetail(BaseModel):
     name: str
     age: int
 
+
 @instructor_cache
 def extract(data) -> UserDetail:
     # Assuming client.chat.completions.create returns a UserDetail instance
@@ -309,7 +319,7 @@ def extract(data) -> UserDetail:
         response_model=UserDetail,
         messages=[
             {"role": "user", "content": data},
-        ]
+        ],
     )
 ```
 

diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md
@@ -478,7 +478,7 @@ instructor jobs create-from-file generated.jsonl
 Once the job is complete, all we need to do is to then change the annotation in the function call to `distil_summarization` in our original file above to start using our new model.
 
 ```py
-@instructions.distil(model='gpt-3.5-turbo:finetuned-123', mode="dispatch") #(1)!
+@instructions.distil(model='gpt-3.5-turbo:finetuned-123', mode="dispatch")  # (1)!
 def distil_summarization(text: str) -> GeneratedSummary:
     summary_chain: List[str] = summarize_article(text)
     return GeneratedSummary(summary=summary_chain[-1])

diff --git a/docs/blog/posts/citations.md b/docs/blog/posts/citations.md
@@ -27,13 +27,14 @@ In this example, we use the `Statements` class to verify if a given substring qu
 ### Code Example:
 
 ```python
-from typing import List, Optional
+from typing import List
 from openai import OpenAI
-from pydantic import BaseModel, Field, ValidationError, ValidationInfo, field_validator, model_validator
+from pydantic import BaseModel, ValidationInfo, field_validator
 import instructor
 
 client = instructor.patch(OpenAI())
 
+
 class Statements(BaseModel):
     body: str
     substring_quote: str
@@ -44,7 +45,7 @@ class Statements(BaseModel):
         context = info.context.get("text_chunks", None)
 
         for text_chunk in context.values():
-            if v in text_chunk: # (1)
+            if v in text_chunk:  # (1)
                 return v
         raise ValueError("Could not find substring_quote `{v}` in contexts")
 

diff --git a/docs/blog/posts/distilation-part1.md b/docs/blog/posts/distilation-part1.md
@@ -32,7 +32,7 @@ Before we dig into the nitty-gritty, let's look at how easy it is to use Instruc
 import logging
 import random
 from pydantic import BaseModel
-from instructor import Instructions # pip install instructor
+from instructor import Instructions  # pip install instructor
 
 # Logging setup
 logging.basicConfig(level=logging.INFO)
@@ -43,14 +43,16 @@ instructions = Instructions(
     # log handler is used to save the data to a file
     # you can imagine saving it to a database or other storage
     # based on your needs!
-    log_handlers=[logging.FileHandler("math_finetunes.jsonl")]
+    log_handlers=[logging.FileHandler("math_finetunes.jsonl")],
 )
 
+
 class Multiply(BaseModel):
     a: int
     b: int
     result: int
 
+
 # Define a function with distillation
 # The decorator will automatically generate a dataset for fine-tuning
 # They must return a pydantic model to leverage function calling
@@ -59,11 +61,22 @@ def fn(a: int, b: int) -> Multiply:
     resp = a * b
     return Multiply(a=a, b=b, result=resp)
 
+
 # Generate some data
 for _ in range(10):
     a = random.randint(100, 999)
     b = random.randint(100, 999)
     print(fn(a, b))
+    #> a=873 b=234 result=204282
+    #> a=902 b=203 result=183106
+    #> a=962 b=284 result=273208
+    #> a=491 b=739 result=362849
+    #> a=193 b=400 result=77200
+    #> a=300 b=448 result=134400
+    #> a=952 b=528 result=502656
+    #> a=574 b=797 result=457478
+    #> a=482 b=204 result=98328
+    #> a=781 b=278 result=217118
 ```
 
 ## The Intricacies of Fine-tuning Language Models
@@ -90,17 +103,17 @@ Here's how the logging output would look:
     "messages": [
         {"role": "system", "content": 'Predict the results of this function: ...'},
         {"role": "user", "content": 'Return fn(133, b=539)'},
-        {"role": "assistant",
-            "function_call":
-                {
-                    "name": "Multiply",
-                    "arguments": '{"a":133,"b":539,"result":89509}'
-            }
-        }
+        {
+            "role": "assistant",
+            "function_call": {
+                "name": "Multiply",
+                "arguments": '{"a":133,"b":539,"result":89509}',
+            },
+        },
     ],
     "functions": [
         {"name": "Multiply", "description": "Correctly extracted `Multiply`..."}
-    ]
+    ],
 }
 ```
 
@@ -121,18 +134,21 @@ Here's a sneak peek of what I'm planning:
 ```python
 from instructor import Instructions, patch
 
-patch() #(1)!
+patch()  # (1)!
+
 
 class Multiply(BaseModel):
     a: int
     b: int
     result: int
 
+
 instructions = Instructions(
     name="three_digit_multiply",
 )
 
-@instructions.distil(model='gpt-3.5-turbo:finetuned-123', mode="dispatch") # (2)!
+
+@instructions.distil(model='gpt-3.5-turbo:finetuned-123', mode="dispatch")  # (2)!
 def fn(a: int, b: int) -> Multiply:
     resp = a + b
     return Multiply(a=a, b=b, result=resp)