Skip to content

Commit

Permalink
Merge e99a656 into a1ce945
Browse files Browse the repository at this point in the history
  • Loading branch information
kayush2O6 committed Feb 23, 2018
2 parents a1ce945 + e99a656 commit 232d7d6
Show file tree
Hide file tree
Showing 30 changed files with 96 additions and 51 deletions.
1 change: 1 addition & 0 deletions cmd/tf-operator/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ var (
retryPeriod = 3 * time.Second
)

// Run :
func Run(opt *options.ServerOption) error {
namespace := os.Getenv(util.EnvKubeflowNamespace)
if len(namespace) == 0 {
Expand Down
2 changes: 1 addition & 1 deletion dashboard/backend/client/manager.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Client is a package handling authentication/communication with kubernetes API
// client is a package handling authentication/communication with kubernetes API
package client

import (
Expand Down
2 changes: 1 addition & 1 deletion dashboard/backend/handler/api_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ func (apiHandler *APIHandler) handleGetNamespaces(request *restful.Request, resp
log.Warningf("failed to list namespaces.")
response.WriteError(http.StatusInternalServerError, err)
} else {
log.Infof("sucessfully listed namespaces")
log.Infof("successfully listed namespaces")
response.WriteHeaderAndEntity(http.StatusOK, l)
}
}
4 changes: 3 additions & 1 deletion pkg/apis/tensorflow/helper/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func ConfigureAcceleratorsForTFJobSpec(c *tfv1.TFJobSpec, accelerators map[strin

lists := []v1.ResourceList{c.Resources.Limits, c.Resources.Requests}
for _, resources := range lists {
for name, _ := range resources {
for name := range resources {

if _, ok := accelerators[string(name)]; !ok {
continue
Expand Down Expand Up @@ -110,10 +110,12 @@ func Cleanup(c *tfv1.TFJobSpec) {
// We should have default container images so user doesn't have to provide these.
}

// CRDName : method returns custom resource definition kind and group.
func CRDName() string {
return fmt.Sprintf("%s.%s", tfv1.CRDKindPlural, tfv1.CRDGroup)
}

// scalingReason : method returns the reason for scaling the cluster size
func scalingReason(from, to int) string {
return fmt.Sprintf("Current cluster size: %d, desired cluster size: %d", from, to)
}
4 changes: 2 additions & 2 deletions pkg/apis/tensorflow/v1alpha1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ func addDefaultingFuncs(scheme *runtime.Scheme) error {
return RegisterDefaults(scheme)
}

// SetDefaults_TFJob sets any unspecified values to defaults
func SetDefaults_TFJob(obj *TFJob) {
// SetDefaultsTFJob sets any unspecified values to defaults
func SetDefaultsTFJob(obj *TFJob) {
c := &obj.Spec

if c.TFImage == "" {
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/tensorflow/v1alpha1/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func TestSetDefaults_TFJob(t *testing.T) {
}

for _, c := range testCases {
SetDefaults_TFJob(c.in)
SetDefaultsTFJob(c.in)
if !reflect.DeepEqual(c.in, c.expected) {
t.Errorf("Want\n%v; Got\n %v", util.Pformat(c.expected), util.Pformat(c.in))
}
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/tensorflow/v1alpha1/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
)

var (
// SchemeBuilder : an object of runtime.SchemeBuilder
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
AddToScheme = SchemeBuilder.AddToScheme
)
Expand Down
15 changes: 13 additions & 2 deletions pkg/apis/tensorflow/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type TFJob struct {
Status TFJobStatus `json:"status"`
}

// TFJobSpec : structure for storing the TFJob specifications
type TFJobSpec struct {
// TODO(jlewi): Can we we get rid of this and use some value from Kubernetes or a random ide.
RuntimeId string
Expand All @@ -62,11 +63,13 @@ type TFJobSpec struct {
TerminationPolicy *TerminationPolicySpec `json:"terminationPolicy,omitempty"`
}

// TerminationPolicySpec : structure for storing spec for process termination
type TerminationPolicySpec struct {
// Chief policy waits for a particular process (which is the chief) to exit.
Chief *ChiefSpec `json:"chief,omitempty"`
}

// ChiefSpec : structure storing the replica name and replica index
type ChiefSpec struct {
ReplicaName string `json:"replicaName"`
ReplicaIndex int `json:"replicaIndex"`
Expand All @@ -90,7 +93,7 @@ const (
)

// TODO(jlewi): We probably want to add a name field. This would allow us to have more than 1 type of each worker.
// This might be useful if you wanted to have a separate set of workers to do eval.
// TFReplicaSpec : This might be useful if you wanted to have a separate set of workers to do eval.
type TFReplicaSpec struct {
// Replicas is the number of desired replicas.
// This is a pointer to distinguish between explicit zero and unspecified.
Expand All @@ -104,6 +107,7 @@ type TFReplicaSpec struct {
TFReplicaType `json:"tfReplicaType"`
}

// TensorBoardSpec : structure to store the specification of tensorboard
type TensorBoardSpec struct {
//Location of TensorFlow event files
LogDir string `json:"logDir"`
Expand All @@ -112,6 +116,7 @@ type TensorBoardSpec struct {
ServiceType v1.ServiceType `json:"serviceType"`
}

// TFJobPhase : enum to store the phase of tf job
type TFJobPhase string

const (
Expand All @@ -122,7 +127,7 @@ const (
TFJobPhaseFailed TFJobPhase = "Failed"
TFJobPhaseDone TFJobPhase = "Done"
)

// State : enum to store the state of tf job
type State string

const (
Expand All @@ -132,6 +137,7 @@ const (
StateFailed State = "Failed"
)

// TFJobStatus : structure for storing the status of tf jobs
type TFJobStatus struct {
// Phase is the TFJob running phase
Phase TFJobPhase `json:"phase"`
Expand All @@ -144,6 +150,7 @@ type TFJobStatus struct {
ReplicaStatuses []*TFReplicaStatus `json:"replicaStatuses"`
}

// ReplicaState : enum to store the status of replica
type ReplicaState string

const (
Expand All @@ -153,6 +160,7 @@ const (
ReplicaStateSucceeded ReplicaState = "Succeeded"
)

// TFReplicaStatus : structure for storing the status of tf replica
type TFReplicaStatus struct {
TFReplicaType `json:"tf_replica_type"`

Expand All @@ -176,6 +184,7 @@ type TFJobList struct {
Items []TFJob `json:"items"`
}

// ControllerConfig : structure for storing the controller configuration
type ControllerConfig struct {
// Accelerators is a map from the name of the accelerator to the config for that accelerator.
// This should match the value specified as a container limit.
Expand All @@ -194,11 +203,13 @@ type AcceleratorVolume struct {
MountPath string
}

// AcceleratorConfig : structure for storing the accelerator configuration
type AcceleratorConfig struct {
Volumes []AcceleratorVolume
EnvVars []EnvironmentVariableConfig
}

// EnvironmentVariableConfig : structure for storing the environment variables configuration
type EnvironmentVariableConfig struct {
Name string
Value string
Expand Down
8 changes: 4 additions & 4 deletions pkg/apis/tensorflow/v1alpha1/zz_generated.deepcopy.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ func (in *TFJob) DeepCopy() *TFJob {
func (in *TFJob) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
} else {
return nil
}
return nil

}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
Expand Down Expand Up @@ -249,9 +249,9 @@ func (in *TFJobList) DeepCopy() *TFJobList {
func (in *TFJobList) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
} else {
return nil
}
return nil

}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
Expand Down
17 changes: 9 additions & 8 deletions pkg/apis/tensorflow/v1alpha1/zz_generated.defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

// This file was autogenerated by defaulter-gen. Do not edit it manually!
// This file was auto generated by defaulter-gen. Do not edit it manually!

package v1alpha1

Expand All @@ -28,18 +28,19 @@ import (
// Public to allow building arbitrary schemes.
// All generated defaulters are covering - they call all nested defaulters.
func RegisterDefaults(scheme *runtime.Scheme) error {
scheme.AddTypeDefaultingFunc(&TFJob{}, func(obj interface{}) { SetObjectDefaults_TFJob(obj.(*TFJob)) })
scheme.AddTypeDefaultingFunc(&TFJobList{}, func(obj interface{}) { SetObjectDefaults_TFJobList(obj.(*TFJobList)) })
scheme.AddTypeDefaultingFunc(&TFJob{}, func(obj interface{}) { SetObjectDefaultsTFJob(obj.(*TFJob)) })
scheme.AddTypeDefaultingFunc(&TFJobList{}, func(obj interface{}) { SetObjectDefaultsTFJobList(obj.(*TFJobList)) })
return nil
}

func SetObjectDefaults_TFJob(in *TFJob) {
SetDefaults_TFJob(in)
// SetObjectDefaultsTFJob : set the input tfjob as default tfjob
func SetObjectDefaultsTFJob(in *TFJob) {
SetDefaultsTFJob(in)
}

func SetObjectDefaults_TFJobList(in *TFJobList) {
// SetObjectDefaultsTFJobList : set the TFJobs list with default objects
func SetObjectDefaultsTFJobList(in *TFJobList) {
for i := range in.Items {
a := &in.Items[i]
SetObjectDefaults_TFJob(a)
SetObjectDefaultsTFJob(a)
}
}
2 changes: 1 addition & 1 deletion pkg/apis/tensorflow/validation/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func TestValidate(t *testing.T) {
job := &tfv1.TFJob{
Spec: *c.in,
}
tfv1.SetObjectDefaults_TFJob(job)
tfv1.SetObjectDefaultsTFJob(job)
if err := ValidateTFJobSpec(&job.Spec); (err != nil) != c.expectingError {
t.Errorf("unexpected validation result: %v", err)
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/client/clientset/versioned/clientset.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
flowcontrol "k8s.io/client-go/util/flowcontrol"
)

// Interface : contains interface for Discovery and Kubeflow
type Interface interface {
Discovery() discovery.DiscoveryInterface
KubeflowV1alpha1() kubeflowv1alpha1.KubeflowV1alpha1Interface
Expand All @@ -43,7 +44,7 @@ func (c *Clientset) KubeflowV1alpha1() kubeflowv1alpha1.KubeflowV1alpha1Interfac
return c.kubeflowV1alpha1
}

// Deprecated: Kubeflow retrieves the default version of KubeflowClient.
// Kubeflow : [DEPRECATED] Kubeflow retrieves the default version of KubeflowClient.
// Please explicitly pick a version.
func (c *Clientset) Kubeflow() kubeflowv1alpha1.KubeflowV1alpha1Interface {
return c.kubeflowV1alpha1
Expand Down
2 changes: 1 addition & 1 deletion pkg/client/clientset/versioned/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ limitations under the License.

// This package is generated by client-gen with custom arguments.

// This package has the automatically generated clientset.
// Package versioned has the automatically generated clientset.
package versioned
1 change: 1 addition & 0 deletions pkg/client/clientset/versioned/fake/clientset_generated.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ type Clientset struct {
discovery *fakediscovery.FakeDiscovery
}

//Discovery retrieves the DiscoveryClient
func (c *Clientset) Discovery() discovery.DiscoveryInterface {
return c.discovery
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/client/clientset/versioned/fake/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ limitations under the License.

// This package is generated by client-gen with custom arguments.

// This package has the automatically generated fake clientset.
// Package fake has the automatically generated fake clientset.
package fake
2 changes: 1 addition & 1 deletion pkg/client/clientset/versioned/scheme/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ limitations under the License.

// This package is generated by client-gen with custom arguments.

// This package contains the scheme of the automatically generated clientset.
// Package scheme contains the scheme of the automatically generated clientset.
package scheme
5 changes: 5 additions & 0 deletions pkg/client/clientset/versioned/scheme/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,13 @@ import (
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
)

// Scheme : exported var
var Scheme = runtime.NewScheme()

// Codecs : exported var
var Codecs = serializer.NewCodecFactory(Scheme)

// ParameterCodec : exported var
var ParameterCodec = runtime.NewParameterCodec(Scheme)

func init() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ limitations under the License.

// This package is generated by client-gen with custom arguments.

// This package has the automatically generated typed clients.
// package : it has the automatically generated typed clients.
package v1alpha1
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ import (
testing "k8s.io/client-go/testing"
)

// FakeKubeflowV1alpha1 : creates structure for fake testing
type FakeKubeflowV1alpha1 struct {
*testing.Fake
}

// TFJobs : returns a fake tf-job instance
func (c *FakeKubeflowV1alpha1) TFJobs(namespace string) v1alpha1.TFJobInterface {
return &FakeTFJobs{c, namespace}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ limitations under the License.

package v1alpha1

// TFJobExpansion : interface for supporting TFJob expansion
type TFJobExpansion interface{}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
rest "k8s.io/client-go/rest"
)

// KubeflowV1alpha1Interface : interface for setting the REST client and TFJobsGetter
type KubeflowV1alpha1Interface interface {
RESTClient() rest.Interface
TFJobsGetter
Expand All @@ -32,7 +33,7 @@ type KubeflowV1alpha1Interface interface {
type KubeflowV1alpha1Client struct {
restClient rest.Interface
}

// TFJobs : return TFJobs for given namespace
func (c *KubeflowV1alpha1Client) TFJobs(namespace string) TFJobInterface {
return newTFJobs(c, namespace)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
time "time"
)

// NewInformerFunc a helping method for SharedInformerFactory interface
type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer

// SharedInformerFactory a small interface to allow for adding an informer without an import cycle
Expand Down
7 changes: 4 additions & 3 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
)

var (
// ErrVersionOutdated : exported var to capture the error in apiserver
ErrVersionOutdated = errors.New("requested version is outdated in apiserver")

// IndexerInformer uses a delta queue, therefore for deletes we have to use this
Expand All @@ -60,6 +61,7 @@ var (
MaxJobBackOff = 360 * time.Second
)

// Controller is structure to handle various clients
type Controller struct {
KubeClient kubernetes.Interface
APIExtclient apiextensionsclient.Interface
Expand All @@ -85,6 +87,7 @@ type Controller struct {
syncHandler func(jobKey string) (bool, error)
}

// New : a method to setting up client handles and returns controller
func New(kubeClient kubernetes.Interface, APIExtclient apiextensionsclient.Interface, tfJobClient tfjobclient.Interface,
config tfv1alpha1.ControllerConfig, tfJobInformerFactory informers.SharedInformerFactory) (*Controller, error) {
tfJobInformer := tfJobInformerFactory.Kubeflow().V1alpha1().TFJobs()
Expand Down Expand Up @@ -253,10 +256,8 @@ func (c *Controller) syncTFJob(key string) (bool, error) {
// case we should forget about a job when the appropriate condition is reached.
if tfJob.Status.Phase == tfv1alpha1.TFJobPhaseCleanUp {
return true, nil
} else {
return false, nil
}

return false, nil
}

// obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item.
Expand Down

0 comments on commit 232d7d6

Please sign in to comment.