##### Copyright 2019 The TensorFlow Authors. [Licensed under the Apache License, Version 2.0](#scrollTo=y_UVSRtBBsJk).

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License"); { display-mode: "form" }
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/swift/tutorials/model_training_walkthrough"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/swift/blob/master/docs/site/tutorials/model_training_walkthrough.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/swift/blob/master/docs/site/tutorials/model_training_walkthrough.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

# Device placement APIs tutorial

This tutorial demonstrates how to use the device placement APIs in Swift for TensorFlow. Device placement APIs allow you to run operations on a specific device, for example, on a CPU or a GPU.

### Configure imports

Import TensorFlow and Dispatch

In [9]:
import TensorFlow
import Dispatch

### Set up some timing utility functions

We'll use this function to run our code multiple times and return the median execution time.

In [10]:
func getMedianExecutionTime(iterationCount: UInt = 10, _ verbose:Bool = false, _ function: () -> ()) -> Double {
    
    // An array to keep track of each execution timing.
    var timings: [Double] = []
    
    // Run the test iterationCount times, storing each timing in our array.
    for i in 0..<iterationCount {
        let start = DispatchTime.now()
        function()
        let end = DispatchTime.now()
        timings.append(Double(end.uptimeNanoseconds - start.uptimeNanoseconds))        
    }
    
    // If verbose mode is enabled, print every single timing.
    if verbose {
        print("Timings: \(timings.map() {timeDescription($0)}))")        
    }

    // Compute and return the median time.
    let n = timings.count
    let sortedTimings = timings.sorted()
    var medianTime = 0.0
    
    if n % 2 == 0 {    
        medianTime = (sortedTimings[n / 2] + sortedTimings[(n / 2) - 1]) / 2.0
    } else {
        medianTime = sortedTimings[n / 2 - 1]
    }
    
    return medianTime
}

// Used for printing the result in a useful manner.
func timeDescription(_ nanoseconds: Double) -> String {
    switch nanoseconds {
        case ..<1e3:
            return "\(nanoseconds) ns"
        case ..<1e6:
            return "\(nanoseconds/1e3) µs"
        case ..<1e9:
            return "\(nanoseconds/1e6) ms"
        default:
            return "\(nanoseconds/1e9) seconds" 
    }        
}

### Define a computation operation

We'll reuse the same operation for each test for consistency. Multiply two 1000 by 1000 tensors.

In [11]:
func runTestComputation() -> Tensor<Float> {
    // Create two 1000x1000 tensors sampled from a random normal distribution.
    let x = Tensor<Float>(randomNormal: [1000, 1000])
    let w = Tensor<Float>(randomNormal: [1000, 1000])
    
    // Multiply them together and return the result.
    return w * x
}

### Use the `withDevice(_:_:perform:)` API to run on the CPU

In [12]:
// Let's hold on to the result for later use to simulate a realistic scenario.
var cpuResult:Tensor<Float>?

// Wrap everything in a timing block to get the median execution time.
var cpuTiming = getMedianExecutionTime { 
    
    // This will execute the operation on the CPU.
    cpuResult = withDevice(.cpu, 0) { () -> Tensor<Float> in
        return runTestComputation()
    }
}

// Print the median time it took on the CPU.
print("Median CPU Timing: \(timeDescription(cpuTiming))")

// Print the scalar-valued mean of the elements within the resulting Tensor.
print("Mean of Result Tensor: \(cpuResult!.mean())")

Median CPU Timing: 4.1012375 ms
Mean of Result Tensor: -0.0005761595


### Now try running on the GPU

***Note:*** In order to use a GPU in Colaboratory, you need to:
1. Go to ***Runtime > Change runtime type***
2. Click on the Dropdown Menu titled ***'Hardware accelerator'***.
3. Select ***GPU*** from the list.
4. Click the ***Save*** button at the bottom of the dialog.
5. Ensure that your notebook has the 'Connected' status in the upper-righthand corner of the notebook.

In [13]:
// Just like before, let's hold onto the result.
var gpuResult:Tensor<Float>?

let gpuTiming = getMedianExecutionTime { 
    gpuResult = withDevice(.gpu, 0) { () -> Tensor<Float> in
        return runTestComputation()
    }
}

// Print the median time it took on the GPU.
print("Median GPU Timing: \(timeDescription(gpuTiming))")

// Print the scalar-valued mean of the elements within the resulting Tensor.
print("Mean of Result Tensor: \(gpuResult!.mean())")

Median GPU Timing: 168.635 µs
Mean of Result Tensor: -0.0007909669


### Use `withDefaultDevice(perform:)` to force the default device placement behavior

You can use `withDefaultDevice(perform:)` to tell Swift for TensorFlow to use the default device placement behavior. For example, if you're in the middle of a  `withDevice(_:_:perform:)` call on the CPU, you can nest a  `withDefaultDevice(perform:)` call to ensure an operation is performed with the default device placement behavior.

In [14]:
// Start out on the CPU.
withDevice(.cpu, 0) { 
    
    // We'll save the results for later.
    var cpuResult:Tensor<Float>? 
    var defaultResult:Tensor<Float>?
    
    // Get the timing of the computation in the current context (CPU)
    let cpuTiming = getMedianExecutionTime {
        cpuResult = runTestComputation()
    }
    
    // Now get the timing of using the default device placment behavior.
    let defaultTiming = getMedianExecutionTime {
        withDefaultDevice() {
            defaultResult = runTestComputation()
        }
    }
    
    // Compute the means.
    let cpuMean = cpuResult!.mean()
    let defaultMean = defaultResult!.mean()
    
    print("CPU: \(timeDescription(cpuTiming)) Result Mean: \(cpuMean)")
    
    // The default device will be a 
    print("Default: \(timeDescription(defaultTiming)) Result Mean: \(defaultMean)")
}

CPU: 4.029717 ms Result Mean: -0.0013383538
Default: 94.1 µs Result Mean: -0.0015097325
