-
Couldn't load subscription status.
- Fork 45.4k
Add data schema for the benchmark run in Bigquery. #3585
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| [ | ||
| { | ||
| "description": "The ID of the benchmark run, where this metric should tie to.", | ||
| "mode": "REQUIRED", | ||
| "name": "run_id", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The name of the metric, which should be descriptive. E.g. training_loss, accuracy.", | ||
| "mode": "REQUIRED", | ||
| "name": "name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The unit of the metric. E.g. MB per sec.", | ||
| "mode": "NULLABLE", | ||
| "name": "unit", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The value of the metric.", | ||
| "mode": "NULLABLE", | ||
| "name": "value", | ||
| "type": "FLOAT" | ||
| }, | ||
| { | ||
| "description": "The timestamp when the metric is recorded.", | ||
| "mode": "REQUIRED", | ||
| "name": "timestamp", | ||
| "type": "TIMESTAMP" | ||
| }, | ||
| { | ||
| "description": "The global step when this metric is recorded.", | ||
| "mode": "NULLABLE", | ||
| "name": "global_step", | ||
| "type": "INTEGER" | ||
| }, | ||
| { | ||
| "description": "Free format metadata for the extra information about the metric.", | ||
| "mode": "REPEATED", | ||
| "name": "extras", | ||
| "type": "RECORD", | ||
| "fields": [ | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "value", | ||
| "type": "STRING" | ||
| } | ||
| ] | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,324 @@ | ||
| [ | ||
| { | ||
| "description": "The UUID of the run for the benchmark.", | ||
| "mode": "REQUIRED", | ||
| "name": "model_id", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The name of the model, E.g ResNet50, LeNet-5 etc.", | ||
| "mode": "REQUIRED", | ||
| "name": "model_name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The date when the test of the model is started", | ||
| "mode": "REQUIRED", | ||
| "name": "run_date", | ||
| "type": "DATETIME" | ||
| }, | ||
| { | ||
| "description": "The tensorflow version information.", | ||
| "fields": [ | ||
| { | ||
| "description": "Version of the tensorflow. E.g. 1.7.0-rc0", | ||
| "mode": "REQUIRED", | ||
| "name": "version", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "Git Hash of the tensorflow", | ||
| "mode": "NULLABLE", | ||
| "name": "git_hash", | ||
| "type": "STRING" | ||
| } | ||
| ], | ||
| "mode": "REQUIRED", | ||
| "name": "tensorflow_version", | ||
| "type": "RECORD" | ||
| }, | ||
| { | ||
| "description": "The arbitrary attribute of the model.", | ||
| "fields": [ | ||
| { | ||
| "description": "The name of the attribute.", | ||
| "mode": "REQUIRED", | ||
| "name": "name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The value of the attribute.", | ||
| "mode": "NULLABLE", | ||
| "name": "value", | ||
| "type": "STRING" | ||
| } | ||
| ], | ||
| "mode": "REPEATED", | ||
| "name": "attribute", | ||
| "type": "RECORD" | ||
| }, | ||
| { | ||
| "description": "Enviornment variables when the benchmark run is executed.", | ||
| "fields": [ | ||
| { | ||
| "description": "The name of the variable.", | ||
| "mode": "REQUIRED", | ||
| "name": "name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The value of the variable.", | ||
| "mode": "NULLABLE", | ||
| "name": "value", | ||
| "type": "STRING" | ||
| } | ||
| ], | ||
| "mode": "REPEATED", | ||
| "name": "enviornment_variable", | ||
| "type": "RECORD" | ||
| }, | ||
| { | ||
| "description": "The list of hyperparameters of the model.", | ||
| "fields": [ | ||
| { | ||
| "description": "The name of the hyperparameter.", | ||
| "mode": "REQUIRED", | ||
| "name": "name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The string value of the hyperparameter.", | ||
| "mode": "NULLABLE", | ||
| "name": "string_value", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The bool value of the hyperparameter.", | ||
| "mode": "NULLABLE", | ||
| "name": "bool_value", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The int/long value of the hyperparameter.", | ||
| "mode": "NULLABLE", | ||
| "name": "long_value", | ||
| "type": "INTEGER" | ||
| }, | ||
| { | ||
| "description": "The double/float value of hyperparameter.", | ||
| "mode": "NULLABLE", | ||
| "name": "float_value", | ||
| "type": "FLOAT" | ||
| } | ||
| ], | ||
| "mode": "REPEATED", | ||
| "name": "hyperparameter", | ||
| "type": "RECORD" | ||
| }, | ||
| { | ||
| "description": "The dataset that run with the benchmark.", | ||
| "mode": "NULLABLE", | ||
| "name": "dataset", | ||
| "type": "RECORD", | ||
| "fields": [ | ||
| { | ||
| "description": "The name of the dataset that the model is trained/validated with. E.g ImageNet, mnist.", | ||
| "mode": "REQUIRED", | ||
| "name": "name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The arbitrary attribute of the dataset.", | ||
| "fields": [ | ||
| { | ||
| "description": "The name of the attribute.", | ||
| "mode": "REQUIRED", | ||
| "name": "name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The value of the attribute.", | ||
| "mode": "NULLABLE", | ||
| "name": "value", | ||
| "type": "STRING" | ||
| } | ||
| ], | ||
| "mode": "REPEATED", | ||
| "name": "attribute", | ||
| "type": "RECORD" | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where should information about parameter server configuration live? I guess that's mostly about how the model itself is run. Maybe we don't need to explicitly store that as long as we capture the command line and code commit that was run. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ack. I am not worrying that for the moment, we can update the data schema if we want in future. |
||
| "description": "The machine configuration of the benchmark run.", | ||
| "mode": "NULLABLE", | ||
| "name": "machine_config", | ||
| "type": "RECORD", | ||
| "fields": [ | ||
| { | ||
| "description": "The platform information of the benchmark run.", | ||
| "mode": "NULLABLE", | ||
| "name": "platform_info", | ||
| "type": "RECORD", | ||
| "fields": [ | ||
| { | ||
| "description": "Eg: 64bit.", | ||
| "mode": "NULLABLE", | ||
| "name": "bits", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "Eg: ELF.", | ||
| "mode": "NULLABLE", | ||
| "name": "linkage", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "Eg: i386.", | ||
| "mode": "NULLABLE", | ||
| "name": "machine", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "Eg: 3.13.0-76-generic.", | ||
| "mode": "NULLABLE", | ||
| "name": "release", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "Eg: Linux.", | ||
| "mode": "NULLABLE", | ||
| "name": "system", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "Eg: #120-Ubuntu SMP Mon Jan 18 15:59:10 UTC 2016.", | ||
| "mode": "NULLABLE", | ||
| "name": "version", | ||
| "type": "STRING" | ||
| } | ||
| ] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to try to capture cloud info here? ie, running on k8s versus a VM versus metal? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Added a section with minimal cloud info, and a free format key-value pair for the moment. |
||
| }, | ||
| { | ||
| "description": "The CPU information of the benchmark run.", | ||
| "mode": "NULLABLE", | ||
| "name": "cpu_info", | ||
| "type": "RECORD", | ||
| "fields": [ | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "num_cores", | ||
| "type": "INTEGER" | ||
| }, | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "num_cores_allowed", | ||
| "type": "INTEGER" | ||
| }, | ||
| { | ||
| "description" : "How fast are those CPUs.", | ||
| "mode": "NULLABLE", | ||
| "name": "mhz_per_cpu", | ||
| "type": "FLOAT" | ||
| }, | ||
| { | ||
| "description" : "Additional CPU info, Eg: Intel Ivybridge with HyperThreading (24 cores).", | ||
| "mode": "NULLABLE", | ||
| "name": "cpu_info", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description" : "What kind of cpu scaling is enabled on the host. Eg performance, ondemand, conservative, mixed.", | ||
| "mode": "NULLABLE", | ||
| "name": "cpu_governor", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "Cache size of the CPUs.", | ||
| "mode": "NULLABLE", | ||
| "name": "cache_size", | ||
| "type": "RECORD", | ||
| "fields": [ | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "level", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "size", | ||
| "type": "INTEGER" | ||
| } | ||
| ] | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see you went with JSON instead of Proto, which WFM if you find it preferable. But, to make the question more complicated-- what about YAML? We will have a bunch of those for k8s anyhow, and it's much more human-readable without all these brackets. Thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The schema file is used to create bigquery table, and bigquery only accept json as schema format. I don't have other option here. |
||
| "mode": "NULLABLE", | ||
| "name": "gpu_info", | ||
| "type": "RECORD", | ||
| "fields": [ | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "count", | ||
| "type": "INTEGER" | ||
| }, | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "model", | ||
| "type": "STRING" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We probably want to indicate in some way how the GPUs are configured:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added cuda_version which is standard, not sure we could capture other info easily or not. |
||
| }, | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "cuda_version", | ||
| "type": "STRING" | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
| "description": "The cloud instance inforation if the benchmark run is executed on cloud", | ||
| "mode": "NULLABLE", | ||
| "name": "cloud_info", | ||
| "type": "RECORD", | ||
| "fields": [ | ||
| { | ||
| "description": "The instance type, E.g. n1-standard-4.", | ||
| "mode": "NULLABLE", | ||
| "name": "instance_type", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The arbitrary attribute of the cloud info.", | ||
| "fields": [ | ||
| { | ||
| "description": "The name of the attribute.", | ||
| "mode": "REQUIRED", | ||
| "name": "name", | ||
| "type": "STRING" | ||
| }, | ||
| { | ||
| "description": "The value of the attribute.", | ||
| "mode": "NULLABLE", | ||
| "name": "value", | ||
| "type": "STRING" | ||
| } | ||
| ], | ||
| "mode": "REPEATED", | ||
| "name": "attribute", | ||
| "type": "RECORD" | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "memory_total", | ||
| "type": "INTEGER" | ||
| }, | ||
| { | ||
| "mode": "NULLABLE", | ||
| "name": "memory_available", | ||
| "type": "STRING" | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Capturing env variables relevant to the compute environment would be good-- ie, CUDA_VISIBLE_DEVICES, whether to share GPU memory (sorry, forgetting what that one is right now, but it should suffice to say, there are many). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ack, for the moment, we will just dump them into env variables. |
||
| ] | ||
| } | ||
| ] | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some other things that would be nice:
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. Adding tf verison info and environment variables. The command line info should be captured by the attributes.