Skip to content
Merged
43 changes: 43 additions & 0 deletions src/ai/aiModelHandler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { V1Deployment } from '@kubernetes/client-node'
import { AplAIModelResponse } from 'src/otomi-models'
import { getDeploymentsWithAIModelLabels } from './k8s'

function getConditions(deployment: V1Deployment) {
return (deployment.status?.conditions || []).map((condition) => ({
lastTransitionTime: condition.lastTransitionTime?.toISOString(),
message: condition.message,
reason: condition.reason,
status: condition.status === 'True',
type: condition.type,
}))
}

export function transformK8sDeploymentToAplAIModel(deployment: V1Deployment): AplAIModelResponse {
const labels = deployment.metadata?.labels || {}
const modelName = deployment.metadata?.name || labels.modelName

// Convert K8s deployment conditions to schema format
const conditions = getConditions(deployment)

return {
kind: 'AplAIModel',
metadata: {
name: modelName,
},
spec: {
displayName: modelName,
modelEndpoint: `http://${deployment.metadata?.name}.${deployment.metadata?.namespace}.svc.cluster.local`,
modelType: labels.modelType as 'foundation' | 'embedding',
...(labels.modelDimension && { modelDimension: parseInt(labels.modelDimension, 10) }),
},
status: {
conditions,
phase: deployment.status?.readyReplicas && deployment.status.readyReplicas > 0 ? 'Ready' : 'NotReady',
},
}
}

export async function getAIModels(): Promise<AplAIModelResponse[]> {
const deployments = await getDeploymentsWithAIModelLabels()
return deployments.map(transformK8sDeploymentToAplAIModel)
}
29 changes: 29 additions & 0 deletions src/ai/k8s.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import { AppsV1Api, KubeConfig, V1Deployment } from '@kubernetes/client-node'
import Debug from 'debug'

const debug = Debug('otomi:ai:k8s')

let appsApiClient: AppsV1Api | undefined

function getAppsApiClient(): AppsV1Api {
if (appsApiClient) return appsApiClient
const kc = new KubeConfig()
kc.loadFromDefault()
appsApiClient = kc.makeApiClient(AppsV1Api)
return appsApiClient
}

export async function getDeploymentsWithAIModelLabels(): Promise<V1Deployment[]> {
const appsApi = getAppsApiClient()

try {
const labelSelector = 'modelType,modelName'
const result = await appsApi.listDeploymentForAllNamespaces({ labelSelector })

debug(`Found ${result.items.length} AI model deployments`)
return result.items
} catch (e) {
debug('Error fetching deployments from Kubernetes:', e)
return []
}
}
33 changes: 33 additions & 0 deletions src/api.authz.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -769,4 +769,37 @@ describe('API authz tests', () => {
.expect('Content-Type', /json/)
})
})

describe('AI Models endpoint tests', () => {
test('platform admin can get AI models', async () => {
jest.spyOn(otomiStack, 'getAllAIModels').mockResolvedValue([])
await agent
.get('/alpha/ai/models')
.set('Authorization', `Bearer ${platformAdminToken}`)
.expect(200)
.expect('Content-Type', /json/)
})

test('team admin can get AI models', async () => {
jest.spyOn(otomiStack, 'getAllAIModels').mockResolvedValue([])
await agent
.get('/alpha/ai/models')
.set('Authorization', `Bearer ${teamAdminToken}`)
.expect(200)
.expect('Content-Type', /json/)
})

test('team member can get AI models', async () => {
jest.spyOn(otomiStack, 'getAllAIModels').mockResolvedValue([])
await agent
.get('/alpha/ai/models')
.set('Authorization', `Bearer ${teamMemberToken}`)
.expect(200)
.expect('Content-Type', /json/)
})

test('anonymous user cannot get AI models', async () => {
await agent.get('/alpha/ai/models').expect(401)
})
})
})
20 changes: 20 additions & 0 deletions src/api/alpha/ai/models.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import Debug from 'debug'
import { Operation, OperationHandlerArray } from 'express-openapi'
import { OpenApiRequestExt } from 'src/otomi-models'

const debug = Debug('otomi:api:alpha:ai:models')

export default function (): OperationHandlerArray {
const get: Operation = [
/* business middleware not expressible by OpenAPI documentation goes here */
async ({ otomi }: OpenApiRequestExt, res): Promise<void> => {
debug('getAllAIModels')
const v = await otomi.getAllAIModels()
res.json(v)
},
]
const api = {
get,
}
return api
}
14 changes: 7 additions & 7 deletions src/openapi/aiModel.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
AplAIModel:
AIModel:
type: object
x-acl:
platformAdmin:
- read-any
teamAdmin:
- read
teamMember:
- read
platformAdmin: [read-any]
teamAdmin: [read-any]
teamMember: [read-any]
properties: {}

AplAIModelSpec:
x-acl:
Expand Down Expand Up @@ -38,4 +37,5 @@ AplAIModelSpec:
example: 4096
required:
- modelEndpoint
- modelType
type: object
6 changes: 4 additions & 2 deletions src/openapi/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2475,7 +2475,7 @@ paths:
get:
operationId: getAIModels
description: Get available shared AI models (foundation or embedding)
x-aclSchema: AplAIModel
x-aclSchema: AIModel
responses:
'200':
description: Successfully obtained shared AI models
Expand Down Expand Up @@ -2827,7 +2827,7 @@ components:
properties:
kind:
type: string
enum: [AplKnowledgeBase]
enum: [AplAIModel]
spec:
$ref: 'aiModel.yaml#/AplAIModelSpec'
required:
Expand Down Expand Up @@ -3127,6 +3127,8 @@ components:
$ref: 'testrepoconnect.yaml#/TestRepoConnect'
InternalRepoUrls:
$ref: 'internalRepoUrls.yaml#/InternalRepoUrls'
AIModel:
$ref: 'aiModel.yaml#/AIModel'
Team:
$ref: 'team.yaml#/Team'
TeamAuthz:
Expand Down
1 change: 1 addition & 0 deletions src/otomi-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export type AppList = components['schemas']['AppList']
export type Backup = components['schemas']['Backup']
export type AplBackupRequest = components['schemas']['AplBackupRequest']
export type AplBackupResponse = components['schemas']['AplBackupResponse']
export type AplAIModelResponse = components['schemas']['AplAIModelResponse']
export type Kubecfg = components['schemas']['Kubecfg']
export type K8sService = components['schemas']['K8sService']
export type Netpol = components['schemas']['Netpol']
Expand Down
8 changes: 7 additions & 1 deletion src/otomi-stack.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { CoreV1Api, User as k8sUser, KubeConfig, V1ObjectReference } from '@kubernetes/client-node'
import { CoreV1Api, KubeConfig, User as k8sUser, V1ObjectReference } from '@kubernetes/client-node'
import Debug from 'debug'

import { getRegions, ObjectStorageKeyRegions } from '@linode/api-v4'
Expand All @@ -12,6 +12,7 @@ import { AlreadyExists, ForbiddenError, HttpError, OtomiError, PublicUrlExists,
import getRepo, { getWorktreeRepo, Git } from 'src/git'
import { cleanSession, getSessionStack } from 'src/middleware'
import {
AplAIModelResponse,
AplBackupRequest,
AplBackupResponse,
AplBuildRequest,
Expand Down Expand Up @@ -114,6 +115,7 @@ import { getSealedSecretsPEM, sealedSecretManifest, SealedSecretManifestType } f
import { getKeycloakUsers, isValidUsername } from './utils/userUtils'
import { ObjectStorageClient } from './utils/wizardUtils'
import { fetchChartYaml, fetchWorkloadCatalog, NewHelmChartValues, sparseCloneChart } from './utils/workloadUtils'
import { getAIModels } from './ai/aiModelHandler'

interface ExcludedApp extends App {
managed: boolean
Expand Down Expand Up @@ -2112,6 +2114,10 @@ export default class OtomiStack {
return names
}

async getAllAIModels(): Promise<AplAIModelResponse[]> {
return getAIModels()
}

async getK8sServices(teamId: string): Promise<Array<K8sService>> {
if (env.isDev) return []
// const teams = user.teams.map((name) => {
Expand Down
Loading