##### Copyright 2018 The TensorFlow Authors. [Licensed under the Apache License, Version 2.0](#scrollTo=y_UVSRtBBsJk).

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License"); { display-mode: "form" }
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/swift/tutorials/model_training_walkthrough"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/swift/blob/master/docs/site/tutorials/model_training_walkthrough.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/swift/blob/master/docs/site/tutorials/model_training_walkthrough.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

# Device Placement APIs Tutorial

This tutorial demonstrates how to use the device placement APIs in Swift for TensorFlow. Device placement APIs allow you to run operations on a specific device, for example, on a CPU or a GPU.

### Configure imports

Import TensorFlow.

In [3]:
import TensorFlow
import Dispatch

### Set Up Some Timing Utility Functions

We'll use this function to run our code multiple times and return the median execution time.

In [4]:
func getMedianExecutionTime(iterationCount: UInt = 10, _ verbose:Bool = false, _ function: () -> ()) -> Double {
    
    // An array to keep track of each execution timing.
    var timings = Array<Double>()
    
    // Run the test iterationCount times, storing each timing in our array.
    for i in 0..<iterationCount {
        let start = DispatchTime.now()
        function()
        let end = DispatchTime.now()
        timings.append(Double(end.uptimeNanoseconds - start.uptimeNanoseconds))        
    }
    
    // If verbose mode is enabled, print every single timing.
    if verbose {
        print("Timings: \(timings.map() {getTimeString($0)}))")        
    }

    // Compute and return the median time.
    let n = timings.count
    let sortedTimings = timings.sorted()
    var medianTime = 0.0
    
    if n % 2 == 0 {    
        medianTime = (sortedTimings[n / 2] + sortedTimings[(n / 2) - 1]) / 2.0
    } else {
        medianTime = sortedTimings[n / 2 - 1]
    }
    
    return medianTime
}

// Used for printing the result in a useful manner.
func getTimeString(_ nanoseconds: Double) -> String {
    if nanoseconds < 1e3 {
        return "\(nanoseconds) ns"
    } else if nanoseconds < 1e6 {
        return "\(nanoseconds/1e3) µs"
    } else if nanoseconds < 1e9 {
        return "\(nanoseconds/1e6) ms"
    } else {
        return "\(nanoseconds/1e9) seconds"
    }
}

### Define a Computation Operation

We'll reuse the same operation for each test for consistency. Multiply two 1000 by 1000 tensors.

In [5]:
func runTestComputation() -> Tensor<Float> {
    // Create two 1000x1000 tensors sampled from a random normal distribution.
    let x = Tensor<Float>(randomNormal: [1000, 1000])
    let w = Tensor<Float>(randomNormal: [1000, 1000])
    
    // Multiply them together and return the result.
    return w * x
}

### Use the `withDevice(_:_:perform:)` API to Run on the CPU

In [4]:
// Let's hold on to the result for later use to simulate a realistic scenario.
var cpu_result:Tensor<Float>?

// Wrap everything in a timing block to get the median execution time.
var cpu_timing = getMedianExecutionTime { 
    
    // This will execute the operation on the CPU.
    cpu_result = withDevice(.cpu, 0) { () -> Tensor<Float> in
        return runTestComputation()
    }
}

// Print the median time it took on the CPU.
print("Median CPU Timing: \(getTimeString(cpu_timing))")

// Print the scalar-valued mean of the elements within the resulting Tensor.
print("Mean of Result Tensor: \(cpu_result!.mean())")

Median CPU Timing: 3.9170575 ms
Mean of Result Tensor: 0.0005039215


### Now Try Running on the GPU

***Note:*** In order to use a GPU in Colaboratory, you need to:
1. Go to ***Runtime > Change Runtime Type***
2. Click on the Dropdown Menu titled ***'Hardware accelerator'***.
3. Select ***GPU*** from the list.
4. Click the ***Save*** button at the bottom of the dialog.
5. Ensure that your notebook has the 'Connected' status in the upper-righthand corner of the notebook.

In [5]:
// Just like before, let's hold onto the result.
var gpu_result:Tensor<Float>?

let gpu_timing = getMedianExecutionTime { 
    gpu_result = withDevice(.gpu, 0) { () -> Tensor<Float> in
        return runTestComputation()
    }
}

// Print the median time it took on the GPU.
print("Median GPU Timing: \(getTimeString(gpu_timing))")

// Print the scalar-valued mean of the elements within the resulting Tensor.
print("Mean of Result Tensor: \(gpu_result!.mean())")

Median GPU Timing: 179.83 µs
Mean of Result Tensor: 0.000121611134


### Using `withDefaultDevice(perform:)`

You can use `withDefaultDevice(perform:)` to tell Swift for TensorFlow to use the default device placement behavior. For example, if you're in the middle of a  `withDevice(_:_:perform:)` call on the CPU, you can nest a  `withDefaultDevice(perform:)` call to ensure an operation is performed with the default device placement behavior.

In [8]:
// Start out on the CPU.
withDevice(.cpu, 0) { 
    
    // We'll save the results for later.
    var cpu_result:Tensor<Float>? 
    var default_result:Tensor<Float>?
    
    // Get the timing of the computation in the current context (CPU)
    let cpu_timing = getMedianExecutionTime {
        cpu_result = runTestComputation()
    }
    
    // Now get the timing of using the default device placment behavior.
    let default_timing = getMedianExecutionTime {
        withDefaultDevice() {
            default_result = runTestComputation()
        }
    }
    
    // Compute the means.
    let cpu_mean = cpu_result!.mean()
    let default_mean = default_result!.mean()
    
    print("CPU: \(getTimeString(cpu_timing)) Result Mean: \(cpu_mean)")
    
    // The default device will be a 
    print("Default: \(getTimeString(default_timing)) Result Mean: \(default_mean)")
}


CPU: 3.658211 ms Result Mean: -0.0007472881
Default: 95.761 µs Result Mean: -0.0014350368
