# Interoperability

## Import from Python

In [0]:
import Python

In [0]:
let np = Python.import("numpy")
let plt = Python.import("matplotlib.pyplot")

// Also enable Jupyter's display capabilities
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

In [0]:
let x = np.linspace(0, 10, 100)

plt.plot(x, np.sin(x))
plt.plot(x, np.cos(x))

plt.show()

## Play an AI game from Python

In [0]:
let gym = Python.import("gym")

In [0]:
import TensorFlow

/// Model parameters and hyperparameters.
let hiddenSize = 128
let batchSize = 16
/// Controls the amount of good/long episodes to retain for training.
let percentile = 70

/// An episode is a list of steps, where each step records the observation from
/// env and the action taken. They will serve respectively as the input and
/// target (label) of the neural net training.
struct Episode {
    struct Step {
        let observation: Tensor<Float>
        let action: Int32
    }

    let steps: [Step]
    let reward: Float
}

/// Filtering out bad/short episodes before we feed them as neural net training data.
func filteringBatch(
  episodes: [Episode],
  actionCount: Int
) -> (input: Tensor<Float>, target: Tensor<Float>, episodeCount: Int, meanReward: Float) {
    let rewards = episodes.map { $0.reward }
    let rewardBound = Float(np.percentile(rewards, percentile))!
    print("rewardBound = \(rewardBound)")

    var input = Tensor<Float>(0.0)
    var target = Tensor<Float>(0.0)
    var totalReward: Float = 0.0

    var retainedEpisodeCount = 0
    for episode in episodes {
        if episode.reward < rewardBound {
            continue
        }

        let observationTensor = Tensor<Float>(episode.steps.map { $0.observation })
        let actionTensor = Tensor<Int32>(episode.steps.map { $0.action })
        let oneHotLabels = Tensor<Float>(oneHotAtIndices: actionTensor, depth: actionCount)

        if retainedEpisodeCount == 0 {
            input = observationTensor
            target = oneHotLabels
        } else {
            input = input.concatenated(with: observationTensor)
            target = target.concatenated(with: oneHotLabels)
        }

        totalReward += episode.reward
        retainedEpisodeCount += 1
    }

    return (input, target, retainedEpisodeCount, totalReward / Float(retainedEpisodeCount))
}

struct CartPoleEnvironment {
    let env: PythonObject
    func reset() -> Tensor<Float> {
        return Tensor<Float>(Tensor<Double>(numpy: env.reset())!)
    }
    func step(_ action: Int32) -> (Tensor<Float>, Float, Bool) {
        let (nextObservation, reward, isDone, _) = env.step(Int(action)).tuple4
        return (
            Tensor<Float>(Tensor<Double>(numpy: nextObservation)!),
            Float(reward)!, Bool(isDone)!)
    }
}

extension Tensor where Scalar: TensorFlowFloatingPoint {
    func categorical(samples: Int) -> Tensor<Int32> {
        let logits = self.rank == 1 ? self.reshaped(to: [1, self.shape[0]]) : self
        return Raw.multinomial(
            logits: logits,
            numSamples: Tensor<Int32>(Int32(samples)))
    }
}

### Neural network

In [0]:
/// A simple two layer dense neural net.
struct Net: Layer {
    typealias Input = Tensor<Float>
    typealias Output = Tensor<Float>

    var l1, l2: Dense<Float>

    init(observationSize: Int, hiddenSize: Int, actionCount: Int) {
        l1 = Dense<Float>(inputSize: observationSize, outputSize: hiddenSize, activation: relu)
        l2 = Dense<Float>(inputSize: hiddenSize, outputSize: actionCount)
    }

    @differentiable
    func call(_ input: Input) -> Output {
        return input.sequenced(through: l1, l2)
    }
}

### *Observe* and *act*

In [0]:
func nextBatch(env: CartPoleEnvironment, net: Net, batchSize: Int, actionCount: Int) -> [Episode] {
    var observation = env.reset()

    var episodes: [Episode] = []

    // Build up a batch of observations and actions.
    for _ in 0..<batchSize {
        var steps: [Episode.Step] = []
        var episodeReward: Float = 0.0

        // This loop runs one episode.
        while true {
            let action = net(observation.reshaped(to: [1, 4])).categorical(samples: 1).scalarized()
            let (nextObservation, reward, isDone) = env.step(action)
            steps.append(Episode.Step(observation: observation, action: action))

            episodeReward += reward

            if isDone == true {
                episodes.append(Episode(steps: steps, reward: episodeReward))
                observation = env.reset()
                break
            } else {
                observation = nextObservation
            }
        }
    }

    return episodes
}

### Load environment

In [0]:
let env = CartPoleEnvironment(env: gym.make("CartPole-v0"))
let observationSize = Int(env.env.observation_space.shape[0])!
let actionCount = Int(env.env.action_space.n)!
var meanRewards: [Float] = []

var net = Net(observationSize: Int(observationSize), hiddenSize: hiddenSize, actionCount: actionCount)
let optimizer = Adam(for: net, learningRate: 0.01)
var batchIndex = 0

###  Loop to learn

In [0]:
while true {
    print("Processing mini batch \(batchIndex)")
    batchIndex += 1

    let episodes = nextBatch(env: env, net: net, batchSize: batchSize, actionCount: actionCount)
    let (input, target, episodeCount, meanReward) = filteringBatch(
      episodes: episodes, actionCount: actionCount)

    let gradients = withLearningPhase(.training) {
        net.gradient { net -> Tensor<Float> in
            let logits = net(input)
            let loss = softmaxCrossEntropy(logits: logits, probabilities: target)
            print("loss is \(loss)")
            return loss
        }
    }
    optimizer.update(&net.allDifferentiableVariables, along: gradients)

    print("It has episode count \(episodeCount) and mean reward \(meanReward)")
    meanRewards.append(meanReward)

    if meanReward > 199 {
        print("Solved")
        break
    }
}

### Plot rewards

In [0]:
plt.plot(meanRewards)
plt.show()

## C Interoperability

In [0]:
import Glibc

In [0]:
let address = malloc(32)!
let string = address.bindMemory(to: CChar.self, capacity: 32)

strcpy(string, "Plain old C at Google I/O 2019!")
puts(string)

free(address)