diff --git a/changelogs/client_server/newsfragments/1735.feature b/changelogs/client_server/newsfragments/1735.feature new file mode 100644 index 000000000..1d7641421 --- /dev/null +++ b/changelogs/client_server/newsfragments/1735.feature @@ -0,0 +1 @@ +Add support for multi-stream VoIP, as per [MSC3077](https://github.com/matrix-org/matrix-spec-proposals/pull/3077). \ No newline at end of file diff --git a/content/client-server-api/modules/voip_events.md b/content/client-server-api/modules/voip_events.md index 102e3dcd7..4df0ba13f 100644 --- a/content/client-server-api/modules/voip_events.md +++ b/content/client-server-api/modules/voip_events.md @@ -171,18 +171,31 @@ In response to an incoming invite, a client may do one of several things: ##### Streams -Clients are expected to send one stream with one track of kind `audio` (creating a -voice call). They can optionally send a second track in the same stream of kind -`video` (creating a video call). - -Clients implementing this specification use the first stream and will ignore -any streamless tracks. Note that in the JavaScript WebRTC API, this means -`addTrack()` must be passed two parameters: a track and a stream, not just a -track, and in a video call the stream must be the same for both audio and video -track. - -A client may send other streams and tracks but the behaviour of the other party -with respect to presenting such streams and tracks is undefined. +Clients may send more than one stream in a VoIP call. The streams should be +differentiated by including metadata in the [`m.call.invite`](/client-server-api/#mcallinvite), +[`m.call.answer`](/client-server-api/#mcallanswer) and [`m.call.negotiate`](/client-server-api/#mcallnegotiate) +events, using the `sdp_stream_metadata` property. + +`sdp_stream_metadata` maps from the `id` of a stream in the session description, +to metadata about that stream. Currently only one property is defined for the +metadata. This is `purpose`, which should be a string indicating the purpose of +the stream. The following `purpose`s are defined: + +* `m.usermedia` - stream that contains the webcam and/or microphone tracks +* `m.screenshare` - stream with the screen-sharing tracks + +If `sdp_stream_metadata` is present and an incoming stream is not listed in it, +the stream should be ignored. If a stream has a `purpose` of an unknown type, it +should also be ignored. + +For backwards compatibility, if `sdp_stream_metadata` is not present in the +initial [`m.call.invite`](/client-server-api/#mcallinvite) or [`m.call.answer`](/client-server-api/#mcallanswer) +event sent by the other party, the client should assume that this property is +not supported by the other party. It means that multiple streams cannot be +differentiated: the client should only use the first incoming stream and +shouldn't send more than one stream. + +Clients implementing this specification should ignore any streamless tracks. ##### Invitees The `invitee` field should be added whenever the call is intended for one diff --git a/data/event-schemas/examples/m.call.answer.yaml b/data/event-schemas/examples/m.call.answer.yaml index 78b488783..8a6273603 100644 --- a/data/event-schemas/examples/m.call.answer.yaml +++ b/data/event-schemas/examples/m.call.answer.yaml @@ -8,6 +8,14 @@ "answer": { "type" : "answer", "sdp" : "v=0\r\no=- 6584580628695956864 2 IN IP4 127.0.0.1[...]" + }, + "sdp_stream_metadata": { + "271828182845": { + "purpose": "m.screenshare" + }, + "314159265358": { + "purpose": "m.usermedia" + } } } } diff --git a/data/event-schemas/examples/m.call.invite.yaml b/data/event-schemas/examples/m.call.invite.yaml index 45600001e..9547854b5 100644 --- a/data/event-schemas/examples/m.call.invite.yaml +++ b/data/event-schemas/examples/m.call.invite.yaml @@ -9,6 +9,14 @@ "offer": { "type" : "offer", "sdp" : "v=0\r\no=- 6584580628695956864 2 IN IP4 127.0.0.1[...]" + }, + "sdp_stream_metadata": { + "271828182845": { + "purpose": "m.screenshare" + }, + "314159265358": { + "purpose": "m.usermedia" + } } } } diff --git a/data/event-schemas/examples/m.call.negotiate.yaml b/data/event-schemas/examples/m.call.negotiate.yaml index fabb6add4..aaf9daf24 100644 --- a/data/event-schemas/examples/m.call.negotiate.yaml +++ b/data/event-schemas/examples/m.call.negotiate.yaml @@ -9,6 +9,14 @@ "description": { "type" : "offer", "sdp" : "v=0\r\no=- 6584580628695956864 2 IN IP4 127.0.0.1[...]" + }, + "sdp_stream_metadata": { + "271828182845": { + "purpose": "m.screenshare" + }, + "314159265358": { + "purpose": "m.usermedia" + } } } } diff --git a/data/event-schemas/schema/components/sdp_stream_metadata.yaml b/data/event-schemas/schema/components/sdp_stream_metadata.yaml new file mode 100644 index 000000000..f16b4cbdc --- /dev/null +++ b/data/event-schemas/schema/components/sdp_stream_metadata.yaml @@ -0,0 +1,27 @@ +type: object +x-addedInMatrixVersion: "1.10" +description: |- + Metadata describing the [streams](/client-server-api/#streams) that will be + sent. + + This is a map of stream ID to metadata about the stream. +additionalProperties: + type: object + title: StreamMetadata + description: Metadata describing a stream. + properties: + purpose: + type: string + enum: + - m.usermedia + - m.screenshare + description: |- + The purpose of the stream. + + The possible values are: + + * `m.usermedia`: Stream that contains the webcam and/or microphone + tracks. + * `m.screenshare`: Stream with the screen-sharing tracks. + required: + - purpose diff --git a/data/event-schemas/schema/m.call.answer.yaml b/data/event-schemas/schema/m.call.answer.yaml index 163690be2..15e072020 100644 --- a/data/event-schemas/schema/m.call.answer.yaml +++ b/data/event-schemas/schema/m.call.answer.yaml @@ -27,6 +27,9 @@ } }, "required": ["type", "sdp"] + }, + "sdp_stream_metadata": { + "$ref": "components/sdp_stream_metadata.yaml" } }, "required": ["answer"] diff --git a/data/event-schemas/schema/m.call.invite.yaml b/data/event-schemas/schema/m.call.invite.yaml index 72020b266..c688d7b31 100644 --- a/data/event-schemas/schema/m.call.invite.yaml +++ b/data/event-schemas/schema/m.call.invite.yaml @@ -35,7 +35,10 @@ "invitee": { "type": "string", "description": "The ID of the user being called. If omitted, any user in the room can answer.", - "x-addedInMatrixVersion": "1.7", + "x-addedInMatrixVersion": "1.7" + }, + "sdp_stream_metadata": { + "$ref": "components/sdp_stream_metadata.yaml" } }, "required": ["offer", "lifetime"] diff --git a/data/event-schemas/schema/m.call.negotiate.yaml b/data/event-schemas/schema/m.call.negotiate.yaml index b2b47c1d7..e1a14f6f3 100644 --- a/data/event-schemas/schema/m.call.negotiate.yaml +++ b/data/event-schemas/schema/m.call.negotiate.yaml @@ -63,6 +63,8 @@ properties: type: integer description: The time in milliseconds that the negotiation is valid for. Once the negotiation age exceeds this value, clients should discard it. + sdp_stream_metadata: + $ref: components/sdp_stream_metadata.yaml required: - description - lifetime