Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ollama ps command for showing currently loaded models #4327

Merged
merged 6 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions api/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,15 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
return &lr, nil
}

// List running models.
func (c *Client) ListRunning(ctx context.Context) (*ListResponse, error) {
var lr ListResponse
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
return nil, err
}
return &lr, nil
}

// Copy copies a model - creating a model with another name from an existing
// model.
func (c *Client) Copy(ctx context.Context, req *CopyRequest) error {
Expand Down
4 changes: 3 additions & 1 deletion api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,12 @@ type ListResponse struct {
type ModelResponse struct {
Name string `json:"name"`
Model string `json:"model"`
ModifiedAt time.Time `json:"modified_at"`
ModifiedAt time.Time `json:"modified_at,omitempty"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Details ModelDetails `json:"details,omitempty"`
ExpiresAt time.Time `json:"expires_at,omitempty"`
SizeVRAM int64 `json:"size_vram,omitempty"`
}

type TokenResponse struct {
Expand Down
75 changes: 75 additions & 0 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"fmt"
"io"
"log"
"math"
"net"
"net/http"
"os"
Expand Down Expand Up @@ -324,6 +325,18 @@ func RunHandler(cmd *cobra.Command, args []string) error {
}
opts.Format = format

keepAlive, err := cmd.Flags().GetString("keepalive")
Copy link
Member

@jmorganca jmorganca May 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
keepAlive, err := cmd.Flags().GetString("keepalive")
keepAlive, err := cmd.Flags().GetString("keep-alive")

I think this is ok as is, but would suggest keep-alive to be consistent with the api keep_alive – although the design of ollama's cli should be a matter of taste/usability over consistency

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about this, but we have --nowordwrap as one of the other options for run, so it felt weird going to kebab case here.

if err != nil {
return err
}
if keepAlive != "" {
d, err := time.ParseDuration(keepAlive)
if err != nil {
return err
}
opts.KeepAlive = &api.Duration{Duration: d}
}

prompts := args[1:]
// prepend stdin to the prompt if provided
if !term.IsTerminal(int(os.Stdin.Fd())) {
Expand Down Expand Up @@ -496,6 +509,52 @@ func ListHandler(cmd *cobra.Command, args []string) error {
return nil
}

func ListRunningHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}

models, err := client.ListRunning(cmd.Context())
if err != nil {
return err
}

var data [][]string

for _, m := range models.Models {
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
var procStr string
switch {
case m.SizeVRAM == 0:
procStr = "100% CPU"
case m.SizeVRAM == m.Size:
procStr = "100% GPU"
case m.SizeVRAM > m.Size || m.Size == 0:
procStr = "Unknown"
default:
sizeCPU := m.Size - m.SizeVRAM
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent))
}
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, format.HumanTime(m.ExpiresAt, "Never")})
}
}

table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "UNTIL"})
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetHeaderLine(false)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding("\t")
table.AppendBulk(data)
table.Render()

return nil
}

func DeleteHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
Expand Down Expand Up @@ -672,6 +731,7 @@ type runOptions struct {
Images []api.ImageData
Options map[string]interface{}
MultiModal bool
KeepAlive *api.Duration
}

type displayResponseState struct {
Expand Down Expand Up @@ -766,6 +826,10 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
Options: opts.Options,
}

if opts.KeepAlive != nil {
req.KeepAlive = opts.KeepAlive
}

if err := client.Chat(cancelCtx, req, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil, nil
Expand Down Expand Up @@ -1075,6 +1139,7 @@ func NewCLI() *cobra.Command {
RunE: RunHandler,
}

runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
runCmd.Flags().Bool("verbose", false, "Show timings for response")
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
Expand Down Expand Up @@ -1123,6 +1188,14 @@ Environment Variables:
PreRunE: checkServerHeartbeat,
RunE: ListHandler,
}

psCmd := &cobra.Command{
Use: "ps",
Short: "List running models",
PreRunE: checkServerHeartbeat,
RunE: ListRunningHandler,
}

copyCmd := &cobra.Command{
Use: "cp SOURCE DESTINATION",
Short: "Copy a model",
Expand All @@ -1146,6 +1219,7 @@ Environment Variables:
pullCmd,
pushCmd,
listCmd,
psCmd,
copyCmd,
deleteCmd,
} {
Expand All @@ -1160,6 +1234,7 @@ Environment Variables:
pullCmd,
pushCmd,
listCmd,
psCmd,
copyCmd,
deleteCmd,
)
Expand Down
5 changes: 5 additions & 0 deletions cmd/interactive.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ func loadModel(cmd *cobra.Command, opts *runOptions) error {
Model: opts.Model,
Messages: []api.Message{},
}

if opts.KeepAlive != nil {
chatReq.KeepAlive = opts.KeepAlive
}

err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
p.StopAndClear()
if len(opts.Messages) > 0 {
Expand Down
4 changes: 3 additions & 1 deletion format/time.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ func humanTime(t time.Time, zeroValue string) string {
}

delta := time.Since(t)
if delta < 0 {
if int(delta.Hours())/24/365 < -20 {
return "Forever"
} else if delta < 0 {
return humanDuration(-delta) + " from now"
}

Expand Down
10 changes: 10 additions & 0 deletions format/time_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,14 @@ func TestHumanTime(t *testing.T) {
v := now.Add(800 * time.Millisecond)
assertEqual(t, HumanTime(v, ""), "Less than a second from now")
})

t.Run("time way in the future", func(t *testing.T) {
v := now.Add(24 * time.Hour * 365 * 200)
assertEqual(t, HumanTime(v, ""), "Forever")
})

t.Run("time way in the future lowercase", func(t *testing.T) {
v := now.Add(24 * time.Hour * 365 * 200)
assertEqual(t, HumanTimeLower(v, ""), "forever")
})
}
5 changes: 5 additions & 0 deletions llm/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type LlamaServer interface {
Detokenize(ctx context.Context, tokens []int) (string, error)
Close() error
EstimatedVRAM() uint64
EstimatedTotal() uint64
}

// llmServer is an instance of the llama.cpp server
Expand Down Expand Up @@ -945,6 +946,10 @@ func (s *llmServer) EstimatedVRAM() uint64 {
return s.estimatedVRAM
}

func (s *llmServer) EstimatedTotal() uint64 {
return s.estimatedTotal
}

func parseDurationMs(ms float64) time.Duration {
dur, err := time.ParseDuration(fmt.Sprintf("%fms", ms))
if err != nil {
Expand Down
29 changes: 29 additions & 0 deletions server/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,7 @@ func (s *Server) GenerateRoutes() http.Handler {
r.POST("/api/show", s.ShowModelHandler)
r.POST("/api/blobs/:digest", s.CreateBlobHandler)
r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
r.GET("/api/ps", s.ProcessHandler)

// Compatibility endpoints
r.POST("/v1/chat/completions", openai.Middleware(), s.ChatHandler)
Expand Down Expand Up @@ -1137,6 +1138,34 @@ func streamResponse(c *gin.Context, ch chan any) {
})
}

func (s *Server) ProcessHandler(c *gin.Context) {
models := []api.ModelResponse{}

for _, v := range s.sched.loaded {
model := v.model
modelDetails := api.ModelDetails{
Format: model.Config.ModelFormat,
Family: model.Config.ModelFamily,
Families: model.Config.ModelFamilies,
ParameterSize: model.Config.ModelType,
QuantizationLevel: model.Config.FileType,
}

mr := api.ModelResponse{
Model: model.ShortName,
Name: model.ShortName,
Size: int64(v.estimatedTotal),
SizeVRAM: int64(v.estimatedVRAM),
Digest: model.Digest,
Details: modelDetails,
ExpiresAt: v.expiresAt,
}
models = append(models, mr)
}

c.JSON(http.StatusOK, api.ListResponse{Models: models})
}

// ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model
func chatPrompt(ctx context.Context, runner *runnerRef, template string, messages []api.Message, numCtx int) (string, error) {
encode := func(s string) ([]int, error) {
Expand Down
Loading
Loading