/
pjrt_c_api.h
2104 lines (1803 loc) · 77.8 KB
/
pjrt_c_api.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef XLA_PJRT_C_PJRT_C_API_H_
#define XLA_PJRT_C_PJRT_C_API_H_
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#define PJRT_STRUCT_SIZE(struct_type, last_field) \
offsetof(struct_type, last_field) + sizeof(((struct_type*)0)->last_field)
#define PJRT_DEFINE_STRUCT_TRAITS(sname, last_field) \
typedef struct sname sname; \
const size_t sname##_STRUCT_SIZE = PJRT_STRUCT_SIZE(sname, last_field);
#ifdef __cplusplus
extern "C" {
#endif
// --------------------------------- Version -----------------------------------
// Incremented when an ABI-incompatible change is made to the interface.
// Changes include:
// * Deleting a method or argument
// * Changing the type of an argument
// * Rearranging fields in the PJRT_Api or argument structs
#define PJRT_API_MAJOR 0
// Incremented when the interface is updated in a way that is potentially
// ABI-compatible with older versions, if supported by the caller and/or
// implementation.
//
// Callers can implement forwards compatibility by using PJRT_Api_Version to
// check if the implementation is aware of newer interface additions.
//
// Implementations can implement backwards compatibility by using the
// `struct_size` fields to detect how many struct fields the caller is aware of.
//
// Changes include:
// * Adding a new field to the PJRT_Api or argument structs
// * Renaming a method or argument (doesn't affect ABI)
#define PJRT_API_MINOR 34
// The plugin should set the major_version and minor_version of
// PJRT_Api.pjrt_api_version to be the `PJRT_API_MAJOR` and `PJRT_API_MINOR` in
// this header that the implementation was compiled with.
struct PJRT_Api_Version {
size_t struct_size;
void* priv;
int major_version; // out
int minor_version; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Api_Version, minor_version);
// ---------------------------------- Errors -----------------------------------
// PJRT C API methods generally return a PJRT_Error*, which is nullptr if there
// is no error and set if there is. The implementation allocates any returned
// PJRT_Errors, but the caller is always responsible for freeing them via
// PJRT_Error_Destroy.
typedef struct PJRT_Error PJRT_Error;
struct PJRT_Error_Destroy_Args {
size_t struct_size;
void* priv;
PJRT_Error* error;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Error_Destroy_Args, error);
// Frees `error`. `error` can be nullptr.
typedef void PJRT_Error_Destroy(PJRT_Error_Destroy_Args* args);
struct PJRT_Error_Message_Args {
size_t struct_size;
void* priv;
const PJRT_Error* error;
// Has the lifetime of `error`.
const char* message; // out
size_t message_size; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Error_Message_Args, message_size);
// Gets the human-readable reason for `error`. `message` has the lifetime of
// `error`.
typedef void PJRT_Error_Message(PJRT_Error_Message_Args* args);
// Codes are based on https://abseil.io/docs/cpp/guides/status-codes
typedef enum {
PJRT_Error_Code_CANCELLED = 1,
PJRT_Error_Code_UNKNOWN = 2,
PJRT_Error_Code_INVALID_ARGUMENT = 3,
PJRT_Error_Code_DEADLINE_EXCEEDED = 4,
PJRT_Error_Code_NOT_FOUND = 5,
PJRT_Error_Code_ALREADY_EXISTS = 6,
PJRT_Error_Code_PERMISSION_DENIED = 7,
PJRT_Error_Code_RESOURCE_EXHAUSTED = 8,
PJRT_Error_Code_FAILED_PRECONDITION = 9,
PJRT_Error_Code_ABORTED = 10,
PJRT_Error_Code_OUT_OF_RANGE = 11,
PJRT_Error_Code_UNIMPLEMENTED = 12,
PJRT_Error_Code_INTERNAL = 13,
PJRT_Error_Code_UNAVAILABLE = 14,
PJRT_Error_Code_DATA_LOSS = 15,
PJRT_Error_Code_UNAUTHENTICATED = 16
} PJRT_Error_Code;
struct PJRT_Error_GetCode_Args {
size_t struct_size;
void* priv;
const PJRT_Error* error;
PJRT_Error_Code code; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Error_GetCode_Args, code);
typedef PJRT_Error* PJRT_Error_GetCode(PJRT_Error_GetCode_Args* args);
// Function for PJRT implementation to pass to callback functions provided by
// caller so the callback can create a PJRT_Error* on error (to return to the
// implementation). `message` is only required to live for the
// PJRT_CallbackError call, i.e. the PJRT_CallbackError implementation must copy
// `message` into the PJRT_Error.
typedef PJRT_Error* (*PJRT_CallbackError)(PJRT_Error_Code code,
const char* message,
size_t message_size);
// ---------------------------- Named Values -----------------------------------
typedef enum {
PJRT_NamedValue_kString = 0,
PJRT_NamedValue_kInt64,
PJRT_NamedValue_kInt64List,
PJRT_NamedValue_kFloat,
PJRT_NamedValue_kBool,
} PJRT_NamedValue_Type;
// Named value for key-value pairs.
struct PJRT_NamedValue {
size_t struct_size;
void* priv;
const char* name;
size_t name_size;
PJRT_NamedValue_Type type;
union {
const char* string_value;
int64_t int64_value;
const int64_t* int64_array_value;
float float_value;
bool bool_value;
};
// `value_size` is the number of elements for array/string and 1 for scalar
// values.
size_t value_size;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_NamedValue, value_size);
// ---------------------------------- Plugin -----------------------------------
struct PJRT_Plugin_Initialize_Args {
size_t struct_size;
void* priv;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Plugin_Initialize_Args, priv);
// One-time plugin setup. Must be called before any other functions are called.
typedef PJRT_Error* PJRT_Plugin_Initialize(PJRT_Plugin_Initialize_Args* args);
struct PJRT_Plugin_Attributes_Args {
size_t struct_size;
void* priv;
// Returned attributes have the lifetime of the process.
PJRT_NamedValue* attributes; // out
size_t num_attributes; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Plugin_Attributes_Args, attributes);
// Returns an array of plugin attributes which are key-value pairs. One example
// attribute is the minimum supported StableHLO version.
// TODO(b/280349977): standardize the list of attributes.
typedef PJRT_Error* PJRT_Plugin_Attributes(PJRT_Plugin_Attributes_Args* args);
// ---------------------------------- Events -----------------------------------
// Represents a notifying event that is returned by PJRT APIs that enqueue
// asynchronous work, informing callers when the work is complete and reporting
// a value of type `PJRT_Error*` or `nullptr` as error status.
//
// Callers are always responsible for freeing `PJRT_Event`s by calling
// `PJRT_Event_Destroy`.
typedef struct PJRT_Event PJRT_Event;
struct PJRT_Event_Destroy_Args {
size_t struct_size;
void* priv;
PJRT_Event* event;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Event_Destroy_Args, event);
// Frees `event`. `event` can be `nullptr`.
typedef PJRT_Error* PJRT_Event_Destroy(PJRT_Event_Destroy_Args* args);
struct PJRT_Event_IsReady_Args {
size_t struct_size;
void* priv;
PJRT_Event* event;
bool is_ready; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Event_IsReady_Args, is_ready);
// Returns true if this PJRT_Event has completed, including if an error has
// occurred.
typedef PJRT_Error* PJRT_Event_IsReady(PJRT_Event_IsReady_Args* args);
struct PJRT_Event_Error_Args {
size_t struct_size;
void* priv;
PJRT_Event* event;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Event_Error_Args, event);
// Should only be called if PJRT_Event_IsReady returns true.
// Returns `nullptr` if there is no error.
// The returned error should be freed with `PJRT_Error_Destroy`.
//
// If `PJRT_Event_Await` has been called, this will return a pointer to an
// identical error status as that call, as will subsequent calls to
// `PJRT_Event_Error`. However, each of these `PJRT_Error *` pointers are
// independent of `PJRT_Error *`s returned by other function calls, so they must
// each be freed separately using `PJRT_Error_Destroy`.
typedef PJRT_Error* PJRT_Event_Error(PJRT_Event_Error_Args* args);
struct PJRT_Event_Await_Args {
size_t struct_size;
void* priv;
PJRT_Event* event;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Event_Await_Args, event);
// Blocks the calling thread until `event` is ready, then returns the error
// status (with `nullptr` indicating no error). The returned status should be
// freed with `PJRT_Error_Destroy`.
typedef PJRT_Error* PJRT_Event_Await(PJRT_Event_Await_Args* args);
// A callback to be performed once an event is ready. It will be called on the
// event's error state and a pointer to an object of the caller's choice.
// Ownership of `error` is passed to the callback. The callback must destroy
// `error` via `PJRT_Error_Destroy`. The caller retains ownership of `user_arg`.
typedef void (*PJRT_Event_OnReadyCallback)(PJRT_Error* error, void* user_arg);
struct PJRT_Event_OnReady_Args {
size_t struct_size;
void* priv;
PJRT_Event* event;
PJRT_Event_OnReadyCallback callback;
// `user_arg` allows `callback` to be called with arbitrary arguments (e.g.
// via pointers in a struct cast to void*).
void* user_arg;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Event_OnReady_Args, user_arg);
// Registers `callback` to be called once `event` is ready, with `event`'s
// error status and a pointer to an object of the caller's choice as arguments.
typedef PJRT_Error* PJRT_Event_OnReady(PJRT_Event_OnReady_Args* args);
// ---------------------------------- Client -----------------------------------
typedef struct PJRT_Client PJRT_Client;
typedef struct PJRT_Device PJRT_Device;
typedef struct PJRT_Memory PJRT_Memory;
typedef struct PJRT_DeviceDescription PJRT_DeviceDescription;
typedef struct PJRT_Executable PJRT_Executable;
typedef struct PJRT_LoadedExecutable PJRT_LoadedExecutable;
typedef struct PJRT_Buffer PJRT_Buffer;
// The caller of PJRT_Client_Create can optionally provide a key-value store
// accessible across nodes and/or processes. KV store access may be necessary to
// create some multi-node/multi-process clients. The caller can provide the two
// callbacks below to access the key-value store.
// A callback to delete the value returned by PJRT_KeyValueGetCallback.
typedef void (*PJRT_KeyValueGetCallback_ValueDeleter)(char* value);
struct PJRT_KeyValueGetCallback_Args {
size_t struct_size;
void* priv;
const char* key;
size_t key_size;
int timeout_in_ms;
PJRT_CallbackError* callback_error;
void* user_arg;
char* value; // out
size_t value_size; // out
// The caller needs to set a PJRT_KeyValueGetCallback_ValueDeleter to delete
// the value returned by PJRT_KeyValueGetCallback. The implementation is
// responsible for copying `value` and then calling value_deleter_callback.
PJRT_KeyValueGetCallback_ValueDeleter value_deleter_callback; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_KeyValueGetCallback_Args,
value_deleter_callback);
// Requirements for PJRT_KeyValueGetCallback implementation: (1) Thread-safe.
// (2) The caller that provides the two callbacks is responsible for avoiding
// key collisions between different users of key-value store (i.e. between
// different plugins, but not between different nodes in one plugin). (3)
// Blocking.
typedef PJRT_Error* (*PJRT_KeyValueGetCallback)(
PJRT_KeyValueGetCallback_Args* args);
struct PJRT_KeyValuePutCallback_Args {
size_t struct_size;
void* priv;
const char* key;
size_t key_size;
// Only needs to stay alive for the duration of the PJRT_KeyValuePutCallback
// call.
const char* value;
size_t value_size;
PJRT_CallbackError* callback_error;
void* user_arg;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_KeyValuePutCallback_Args, user_arg);
// Requirements for PJRT_KeyValuePutCallback implementation: (1) Thread-safe.
// (2) The caller that provides the two callbacks is responsible for avoiding
// key collisions between different users of key-value store (i.e. between
// different plugins, but not between different nodes in one plugin).
typedef PJRT_Error* (*PJRT_KeyValuePutCallback)(
PJRT_KeyValuePutCallback_Args* args);
struct PJRT_Client_Create_Args {
size_t struct_size;
void* priv;
// Extra platform-specific options to create a client.
PJRT_NamedValue* create_options;
size_t num_options;
// Key-value get/put callback provided by the caller of PJRT_Client_Create.
// PJRT client can use these callbacks to share information between
// processes/nodes.
PJRT_KeyValueGetCallback kv_get_callback;
// Will be passed to `kv_get_callback` as `user_arg` argument.
void* kv_get_user_arg;
PJRT_KeyValuePutCallback kv_put_callback;
// Will be passed to `kv_put_callback` as `user_arg` argument.
void* kv_put_user_arg;
PJRT_Client* client; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_Create_Args, client);
// Creates and initializes a new PJRT_Client and returns in `client`.
typedef PJRT_Error* PJRT_Client_Create(PJRT_Client_Create_Args* args);
struct PJRT_Client_Destroy_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_Destroy_Args, client);
// Shuts down and frees `client`. `client` can be nullptr.
typedef PJRT_Error* PJRT_Client_Destroy(PJRT_Client_Destroy_Args* args);
struct PJRT_Client_PlatformName_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
// `platform_name` has the same lifetime as `client`. It is owned by `client`.
const char* platform_name; // out
size_t platform_name_size; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_PlatformName_Args, platform_name_size);
// Returns a string that identifies the platform (e.g. "cpu", "gpu", "tpu").
typedef PJRT_Error* PJRT_Client_PlatformName(
PJRT_Client_PlatformName_Args* args);
struct PJRT_Client_ProcessIndex_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
int process_index; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_ProcessIndex_Args, process_index);
// Return the process index of this client. Always 0 in single-process
// settings.
typedef PJRT_Error* PJRT_Client_ProcessIndex(
PJRT_Client_ProcessIndex_Args* args);
struct PJRT_Client_PlatformVersion_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
// `platform_version` has the same lifetime as `client`. It's owned by
// `client`.
const char* platform_version; // out
size_t platform_version_size; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_PlatformVersion_Args,
platform_version_size);
// Returns a string containing human-readable, platform-specific version info
// (e.g. the CUDA version on GPU or libtpu version on Cloud TPU).
typedef PJRT_Error* PJRT_Client_PlatformVersion(
PJRT_Client_PlatformVersion_Args* args);
struct PJRT_Client_Devices_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
PJRT_Device** devices; // out
size_t num_devices; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_Devices_Args, num_devices);
// Returns a list of all devices visible to the runtime, including addressable
// and non-addressable devices.
typedef PJRT_Error* PJRT_Client_Devices(PJRT_Client_Devices_Args* args);
struct PJRT_Client_AddressableDevices_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
PJRT_Device** addressable_devices; // out
size_t num_addressable_devices; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_AddressableDevices_Args,
num_addressable_devices);
// Returns a list of devices that are addressable from the client.
// Addressable devices are those that the client can issue commands to.
// All devices are addressable in a single-process environment.
typedef PJRT_Error* PJRT_Client_AddressableDevices(
PJRT_Client_AddressableDevices_Args* args);
struct PJRT_Client_LookupDevice_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
int id;
// `device` has the same lifetime as `client`. It is owned by `client`.
PJRT_Device* device; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_LookupDevice_Args, device);
// Returns a PJRT_Device* with the specified ID as returned by
// PJRT_DeviceDescription_Id.
typedef PJRT_Error* PJRT_Client_LookupDevice(
PJRT_Client_LookupDevice_Args* args);
struct PJRT_Client_LookupAddressableDevice_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
int local_hardware_id;
// `addressable_device` has the same lifetime as `client`. It is owned by
// `client`.
PJRT_Device* addressable_device; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_LookupAddressableDevice_Args,
addressable_device);
// Returns an addressable PJRT_Device* with the specified ID as returned by
// PJRT_DeviceDescription_LocalHardwareId.
typedef PJRT_Error* PJRT_Client_LookupAddressableDevice(
PJRT_Client_LookupAddressableDevice_Args* args);
struct PJRT_Client_AddressableMemories_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
PJRT_Memory** addressable_memories; // out
size_t num_addressable_memories; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_AddressableMemories_Args,
num_addressable_memories);
// Returns a list of memories that are addressable from the client. Addressable
// memories are those that the client can directly transfer data to and from.
// All memories are addressable in a single-process environment.
typedef PJRT_Error* PJRT_Client_AddressableMemories(
PJRT_Client_AddressableMemories_Args* args);
struct PJRT_Program {
size_t struct_size;
void* priv;
// Serialized code in the specified format below.
// String is owned by the caller.
char* code; // in/out depending on usage
size_t code_size;
// Supported formats are:
// "hlo": code string takes serialized HloModuleProto.
// "hlo_with_config": code string takes serialized HloModuleProtoWithConfig.
// "mlir": code string takes MLIR module bytecode (or string).
// Ownership of `format` varies across API functions.
const char* format;
size_t format_size;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Program, format_size);
struct PJRT_Client_Compile_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
// Only needs to stay alive for the duration of the Compile call.
// `program->format` and `program->format_size` are owned by the caller.
PJRT_Program* program;
// TODO(b/240560013): consider putting some of option fields in priv.
// Serialized CompileOptionsProto
// (https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/xla/pjrt/compile_options.proto)
const char* compile_options;
size_t compile_options_size;
PJRT_LoadedExecutable* executable; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_Compile_Args, executable);
// Compiles a program in specified format (such as MLIR or HLO) with given
// `options`.
typedef PJRT_Error* PJRT_Client_Compile(PJRT_Client_Compile_Args* args);
struct PJRT_Client_DefaultDeviceAssignment_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
int num_replicas;
int num_partitions;
// Must be greater than or equal to `num_replicas * num_partitions`
size_t default_assignment_size;
// Points to an array of size `default_assignment_size`.
// This API writes `num_replicas * num_partitions` ints within that buffer.
// The caller retains ownership of this memory.
int* default_assignment; // pointer to array in; values written as out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_DefaultDeviceAssignment_Args,
default_assignment);
typedef PJRT_Error* PJRT_Client_DefaultDeviceAssignment(
PJRT_Client_DefaultDeviceAssignment_Args* args);
typedef enum {
// Invalid primitive type to serve as default.
PJRT_Buffer_Type_INVALID,
// Predicates are two-state booleans.
PJRT_Buffer_Type_PRED,
// Signed integral values of fixed width.
PJRT_Buffer_Type_S8,
PJRT_Buffer_Type_S16,
PJRT_Buffer_Type_S32,
PJRT_Buffer_Type_S64,
// Unsigned integral values of fixed width.
PJRT_Buffer_Type_U8,
PJRT_Buffer_Type_U16,
PJRT_Buffer_Type_U32,
PJRT_Buffer_Type_U64,
// Floating-point values of fixed width.
PJRT_Buffer_Type_F16,
PJRT_Buffer_Type_F32,
PJRT_Buffer_Type_F64,
// Truncated 16 bit floating-point format. This is similar to IEEE's 16 bit
// floating-point format, but uses 1 bit for the sign, 8 bits for the exponent
// and 7 bits for the mantissa.
PJRT_Buffer_Type_BF16,
// Complex values of fixed width.
//
// Paired F32 (real, imag), as in std::complex<float>.
PJRT_Buffer_Type_C64,
// Paired F64 (real, imag), as in std::complex<double>.
PJRT_Buffer_Type_C128,
// Truncated 8 bit floating-point formats.
PJRT_Buffer_Type_F8E5M2,
PJRT_Buffer_Type_F8E4M3FN,
PJRT_Buffer_Type_F8E4M3B11FNUZ,
PJRT_Buffer_Type_F8E5M2FNUZ,
PJRT_Buffer_Type_F8E4M3FNUZ,
// 4-bit integer types
PJRT_Buffer_Type_S4,
PJRT_Buffer_Type_U4,
} PJRT_Buffer_Type;
typedef enum {
// The runtime may not hold references to `data` after the call to
// `PJRT_Client_BufferFromHostBuffer` completes. The caller promises that
// `data` is immutable and will not be freed only for the duration of the
// PJRT_Client_BufferFromHostBuffer call.
PJRT_HostBufferSemantics_kImmutableOnlyDuringCall,
// The runtime may hold onto `data` after the call to
// `PJRT_Client_BufferFromHostBuffer`
// returns while the runtime completes a transfer to the device. The caller
// promises not to mutate or free `data` until the transfer completes, at
// which point `done_with_host_buffer` will be triggered.
PJRT_HostBufferSemantics_kImmutableUntilTransferCompletes,
// The PjRtBuffer may alias `data` internally and the runtime may use the
// `data` contents as long as the buffer is alive. The caller promises to
// keep `data` alive and not to mutate its contents as long as the buffer is
// alive; to notify the caller that the buffer may be freed, the runtime
// will call `done_with_host_buffer` when the PjRtBuffer is freed.
PJRT_HostBufferSemantics_kZeroCopy,
} PJRT_HostBufferSemantics;
typedef enum {
PJRT_Buffer_MemoryLayout_Type_Tiled = 0,
PJRT_Buffer_MemoryLayout_Type_Strides,
} PJRT_Buffer_MemoryLayout_Type;
struct PJRT_Buffer_MemoryLayout_Tiled {
size_t struct_size;
void* priv;
// A map from physical dimension numbers to logical dimension numbers.
// The first element is the most minor physical dimension (fastest varying
// index) and the last the most major (slowest varying index). The contents of
// the vector are the indices of the *logical* dimensions in the shape. Must
// be the same size as the number of dimensions of the buffer.
const int64_t* minor_to_major;
size_t minor_to_major_size;
// A concatenated list of tile dimensions.
const int64_t* tile_dims;
// The list of tile dimension sizes. The size of this list is `num_tiles`.
const size_t* tile_dim_sizes;
size_t num_tiles;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Buffer_MemoryLayout_Tiled, num_tiles);
struct PJRT_Buffer_MemoryLayout_Strides {
size_t struct_size;
void* priv;
// Number of bytes to traverse per dimension. Must be the same size as
// the number of dimensions of the data. Caution: `byte_strides` are allowed
// to be negative, in which case data may need to point to the interior of
// the buffer, not necessarily its start.
const int64_t* byte_strides;
size_t num_byte_strides;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Buffer_MemoryLayout_Strides, num_byte_strides);
// Describe the memory layout. It can be (1) a list of minor-to-major order and
// optional tilings (each tile is a list of dimensions), or (2) a list of
// strides.
struct PJRT_Buffer_MemoryLayout {
size_t struct_size;
void* priv;
union {
PJRT_Buffer_MemoryLayout_Tiled tiled;
PJRT_Buffer_MemoryLayout_Strides strides;
};
PJRT_Buffer_MemoryLayout_Type type;
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Buffer_MemoryLayout, type);
struct PJRT_Client_BufferFromHostBuffer_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
// Pointer to the host buffer
const void* data;
// The type of the `data`, and the type of the resulting output `buffer`
PJRT_Buffer_Type type;
// The array dimensions of `data`.
const int64_t* dims;
size_t num_dims;
// Number of bytes to traverse per dimension of the input data. Must be the
// same size as `dims`, or empty. If empty, the array is assumed to have a
// dense layout with dimensions in major-to-minor order
// Caution: `byte_strides` are allowed to be negative, in which case `data`
// may need to point to the interior of the buffer, not necessarily its start.
const int64_t* byte_strides;
size_t num_byte_strides;
PJRT_HostBufferSemantics host_buffer_semantics;
// Device to copy host data to.
PJRT_Device* device;
// If nullptr, host data will be copied to `device`, otherwise we copy data to
// `memory`.
PJRT_Memory* memory;
// The caller is responsible to keep the data (tiled or strides) in the
// device_layout alive during the call. If nullptr, the device layout is
// assumed to be a dense layout with dimensions in major-to-minor order.
PJRT_Buffer_MemoryLayout* device_layout;
// Event indicating when it's safe to free `data`. The caller is responsible
// for calling PJRT_Event_Destroy.
PJRT_Event* done_with_host_buffer; // out
// Output device buffer. The caller is responsible for calling
// PJRT_Buffer_Destroy.
PJRT_Buffer* buffer; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_BufferFromHostBuffer_Args, buffer);
// Asynchronously copies a buffer stored on host to device memory.
typedef PJRT_Error* PJRT_Client_BufferFromHostBuffer(
PJRT_Client_BufferFromHostBuffer_Args* args);
struct PJRT_Client_CreateViewOfDeviceBuffer_Args {
size_t struct_size;
void* priv;
PJRT_Client* client;
// A pointer to a non-owned device buffer. A PJRT_Buffer that is a non-owned
// view of this device buffer will be created.
void* device_buffer_ptr;
const int64_t* dims;
size_t num_dims;
PJRT_Buffer_Type element_type;
PJRT_Buffer_MemoryLayout* layout;
// The device that `device_buffer_ptr` is on.
PJRT_Device* device;
// A callback to be performed when the PJRT_Buffer is done with the on-device
// buffer. This callback is optional and can be a nullptr.
void (*on_delete_callback)(void* device_buffer_ptr, void* user_arg);
// `on_delete_callback_arg` will be passed to `on_delete_callback` as
// `user_arg` argument.
void* on_delete_callback_arg;
// A platform-specific stream handle that should contain the work or events
// needed to materialize the on-device buffer. It is optional and can be
// casted from a nullptr. PJRT_Client_CreateViewOfDeviceBuffer_Args will
// append an event to `stream` that indicates when the returned buffer is
// ready to use. This is intended to support dlpack on GPU and is not expected
// to be supported on all hardware platforms.
intptr_t stream;
PJRT_Buffer* buffer; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Client_CreateViewOfDeviceBuffer_Args, buffer);
// Creates a PJRT buffer that is a non-owned view of an on-device buffer
// (typically allocated by another library). The buffer may be mutated,
// for example, if the buffer is donated to an Execute operation. This method is
// not required on all hardware platforms.
typedef PJRT_Error* PJRT_Client_CreateViewOfDeviceBuffer(
PJRT_Client_CreateViewOfDeviceBuffer_Args* args);
// -------------------------- Device Descriptions ------------------------------
// Device descriptions may be associated with an actual device
// (via PJRT_Device_GetDescription), but they can also be used to describe a
// device that isn't currently available to the plugin. This is useful for
// compiling executables without hardware available, which can then be
// serialized and written somewhere durable, and then loaded and run on actual
// hardware later.
struct PJRT_DeviceDescription_Id_Args {
size_t struct_size;
void* priv;
PJRT_DeviceDescription* device_description;
int id; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_DeviceDescription_Id_Args, id);
// The ID of this device. IDs are unique among devices of this type
// (e.g. CPUs, GPUs). On multi-host platforms, this will be unique across all
// hosts' devices.
typedef PJRT_Error* PJRT_DeviceDescription_Id(
PJRT_DeviceDescription_Id_Args* args);
struct PJRT_DeviceDescription_ProcessIndex_Args {
size_t struct_size;
void* priv;
PJRT_DeviceDescription* device_description;
int process_index; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_DeviceDescription_ProcessIndex_Args,
process_index);
// The index of the process that this device belongs to, i.e. is addressable
// from. This is not always identical to PJRT_Client_ProcessIndex in a
// multi-process setting, where each client can see devices from all
// processes, but only a subset of them are addressable and have the same
// process_index as the client.
typedef PJRT_Error* PJRT_DeviceDescription_ProcessIndex(
PJRT_DeviceDescription_ProcessIndex_Args* args);
struct PJRT_DeviceDescription_Attributes_Args {
size_t struct_size;
void* priv;
PJRT_DeviceDescription* device_description;
size_t num_attributes; // out
PJRT_NamedValue* attributes; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_DeviceDescription_Attributes_Args, attributes);
// Returns an array of device specific attributes with attribute name, value
// and value type.
typedef PJRT_Error* PJRT_DeviceDescription_Attributes(
PJRT_DeviceDescription_Attributes_Args* args);
struct PJRT_DeviceDescription_Kind_Args {
size_t struct_size;
void* priv;
PJRT_DeviceDescription* device_description;
// `device_kind` string is owned by `device` and has same lifetime as
// `device`.
const char* device_kind; // out
size_t device_kind_size; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_DeviceDescription_Kind_Args, device_kind_size);
// A vendor-dependent string that uniquely identifies the kind of device,
// e.g., "Tesla V100-SXM2-16GB".
typedef PJRT_Error* PJRT_DeviceDescription_Kind(
PJRT_DeviceDescription_Kind_Args* args);
struct PJRT_DeviceDescription_DebugString_Args {
size_t struct_size;
void* priv;
PJRT_DeviceDescription* device_description;
const char* debug_string; // out
size_t debug_string_size; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_DeviceDescription_DebugString_Args,
debug_string_size);
// Debug string suitable for logging when errors occur. Should be verbose
// enough to describe the current device unambiguously.
typedef PJRT_Error* PJRT_DeviceDescription_DebugString(
PJRT_DeviceDescription_DebugString_Args* args);
struct PJRT_DeviceDescription_ToString_Args {
size_t struct_size;
void* priv;
PJRT_DeviceDescription* device_description;
const char* to_string; // out
size_t to_string_size; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_DeviceDescription_ToString_Args, to_string_size);
// Debug string suitable for reading by end users, should be reasonably terse,
// for example: "CpuDevice(id=0)".
typedef PJRT_Error* PJRT_DeviceDescription_ToString(
PJRT_DeviceDescription_ToString_Args* args);
// --------------------------------- Devices -----------------------------------
struct PJRT_Device_GetDescription_Args {
size_t struct_size;
void* priv;
PJRT_Device* device;
PJRT_DeviceDescription* device_description; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Device_GetDescription_Args, device_description);
// Fetch the DeviceDescription associated with this device.
typedef PJRT_Error* PJRT_Device_GetDescription(
PJRT_Device_GetDescription_Args* args);
struct PJRT_Device_IsAddressable_Args {
size_t struct_size;
void* priv;
PJRT_Device* device;
bool is_addressable; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Device_IsAddressable_Args, is_addressable);
// Whether client can issue command to this device.
typedef PJRT_Error* PJRT_Device_IsAddressable(
PJRT_Device_IsAddressable_Args* args);
struct PJRT_Device_LocalHardwareId_Args {
size_t struct_size;
void* priv;
PJRT_Device* device;
int local_hardware_id; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Device_LocalHardwareId_Args, local_hardware_id);
// Opaque hardware ID, e.g., the CUDA device number. In general, not guaranteed
// to be dense, and -1 if undefined.
typedef PJRT_Error* PJRT_Device_LocalHardwareId(
PJRT_Device_LocalHardwareId_Args* args);
struct PJRT_Device_AddressableMemories_Args {
size_t struct_size;
void* priv;
PJRT_Device* device;
// Has the lifetime of `device`.
PJRT_Memory** memories; // out
size_t num_memories; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Device_AddressableMemories_Args, memories);
// Returns the memories that a device can address.
typedef PJRT_Error* PJRT_Device_AddressableMemories(
PJRT_Device_AddressableMemories_Args* args);
struct PJRT_Device_DefaultMemory_Args {
size_t struct_size;
void* priv;
PJRT_Device* device;
// `memory` has the same lifetime as `device`.
PJRT_Memory* memory; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Device_DefaultMemory_Args, memory);
// Returns the default memory of a device, i.e. which memory data processed by
// this device should be stored in by default.
typedef PJRT_Error* PJRT_Device_DefaultMemory(
PJRT_Device_DefaultMemory_Args* args);
struct PJRT_Device_MemoryStats_Args {
size_t struct_size;
void* priv;
PJRT_Device* device;
// Number of bytes in use.
int64_t bytes_in_use; // out
// The peak bytes in use.
int64_t peak_bytes_in_use; // out
bool peak_bytes_in_use_is_set; // out
// Number of allocations.
int64_t num_allocs; // out
bool num_allocs_is_set; // out
// The largest single allocation seen.
int64_t largest_alloc_size; // out
bool largest_alloc_size_is_set; // out
// The upper limit of user-allocatable device memory in bytes.
int64_t bytes_limit; // out
bool bytes_limit_is_set; // out
// Number of bytes reserved.
int64_t bytes_reserved; // out
bool bytes_reserved_is_set; // out
// The peak number of bytes reserved.
int64_t peak_bytes_reserved; // out
bool peak_bytes_reserved_is_set; // out
// The upper limit on the number bytes of reservable memory.
int64_t bytes_reservable_limit; // out
bool bytes_reservable_limit_is_set; // out
// Largest free block size in bytes.
int64_t largest_free_block_bytes; // out
bool largest_free_block_bytes_is_set; // out
// Number of bytes of memory held by the allocator. This may be higher than
// bytes_in_use if the allocator holds a pool of memory (e.g. BFCAllocator).
int64_t pool_bytes; // out
bool pool_bytes_is_set; // out
int64_t peak_pool_bytes; // out
bool peak_pool_bytes_is_set; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Device_MemoryStats_Args, peak_pool_bytes_is_set);
// Device memory/allocator statistics. All returned stats except `bytes_in_use`
// are optional and may not be returned by all platforms. Implementations may
// also return PJRT_Error_Code_UNIMPLEMENTED. Intended for diagnostic purposes.
typedef PJRT_Error* PJRT_Device_MemoryStats(PJRT_Device_MemoryStats_Args* args);
//-------------------------------- Memory --------------------------------------
struct PJRT_Memory_Id_Args {
size_t struct_size;
void* priv;
PJRT_Memory* memory;
int id; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Memory_Id_Args, id);
// The ID of this memory. IDs are unique among memories of this type.
typedef PJRT_Error* PJRT_Memory_Id(PJRT_Memory_Id_Args* args);
struct PJRT_Memory_Kind_Args {
size_t struct_size;
void* priv;
PJRT_Memory* memory;
// `memory_kind` has same lifetime as `memory`.
const char* memory_kind; // out
size_t memory_kind_size; // out
};
PJRT_DEFINE_STRUCT_TRAITS(PJRT_Memory_Kind_Args, memory_kind_size);
// A platform-dependent string that uniquely identifies the kind of the memory.
typedef PJRT_Error* PJRT_Memory_Kind(PJRT_Memory_Kind_Args* args);
struct PJRT_Memory_DebugString_Args {
size_t struct_size;