-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cache and reuse intermediate blobs #4330
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,13 +10,16 @@ import ( | |
"net/http" | ||
"os" | ||
"path/filepath" | ||
"sync" | ||
|
||
"github.com/ollama/ollama/api" | ||
"github.com/ollama/ollama/convert" | ||
"github.com/ollama/ollama/llm" | ||
"github.com/ollama/ollama/types/model" | ||
) | ||
|
||
var intermediateBlobs sync.Map | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To confirm, this is a map of blob digests to ggml model layer digests? Does intermediate mean f16 but not yet quantized? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. intermediate here means any blob that's not referenced in but used to produce something in the final manifest. here's a concrete example:
when you want to create another quantization, this map tracks the relationship between the zip and f16 so it's able to skip uploading the zip and reconverting the f16 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this probably deserves a comment describing its operation. I think most people aren't going to know what intermediate means here when they're going through the code. I was trying to think os some other names, like maybe "blobCache" but then I think people will wonder why only some things are in the blobCache. Although why not shove everything into the blobCache? Why just put the "intermediate" stuff there? Or the inverse; why not just put the intermediate stuff on desk and just use the normal machanism for pulling blobs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm also a little confused about why |
||
|
||
type layerWithGGML struct { | ||
*Layer | ||
*llm.GGML | ||
|
@@ -76,7 +79,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe | |
return layers, nil | ||
} | ||
|
||
func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) { | ||
func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) { | ||
stat, err := file.Stat() | ||
if err != nil { | ||
return nil, err | ||
|
@@ -169,12 +172,7 @@ func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResp | |
return nil, fmt.Errorf("aaa: %w", err) | ||
} | ||
|
||
blobpath, err := GetBlobsPath(layer.Digest) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
bin, err := os.Open(blobpath) | ||
bin, err := layer.Open() | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
@@ -185,16 +183,13 @@ func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResp | |
return nil, err | ||
} | ||
|
||
layer, err = NewLayerFromLayer(layer.Digest, layer.MediaType, "") | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
layers = append(layers, &layerWithGGML{layer, ggml}) | ||
|
||
intermediateBlobs.Store(digest, layer.Digest) | ||
return layers, nil | ||
} | ||
|
||
func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) { | ||
func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) { | ||
sr := io.NewSectionReader(file, 0, 512) | ||
contentType, err := detectContentType(sr) | ||
if err != nil { | ||
|
@@ -205,7 +200,7 @@ func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressRespo | |
case "gguf", "ggla": | ||
// noop | ||
case "application/zip": | ||
return parseFromZipFile(ctx, file, fn) | ||
return parseFromZipFile(ctx, file, digest, fn) | ||
default: | ||
return nil, fmt.Errorf("unsupported content type: %s", contentType) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -841,6 +841,25 @@ func (s *Server) HeadBlobHandler(c *gin.Context) { | |
} | ||
|
||
func (s *Server) CreateBlobHandler(c *gin.Context) { | ||
ib, ok := intermediateBlobs.Load(c.Param("digest")) | ||
if ok { | ||
p, err := GetBlobsPath(ib.(string)) | ||
if err != nil { | ||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) | ||
return | ||
} | ||
|
||
if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you should probably include a debug statement in here for the cache hit but something else removed the storage. |
||
intermediateBlobs.Delete(c.Param("digest")) | ||
} else if err != nil { | ||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) | ||
return | ||
} else { | ||
c.Status(http.StatusOK) | ||
return | ||
} | ||
} | ||
|
||
path, err := GetBlobsPath(c.Param("digest")) | ||
if err != nil { | ||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
aren't
ib
anddigest
the same thing? Can't you just call this as_, ok := intermediateBlobs.Load(digest); ok {
and usedigest
below?