This repository has been archived by the owner on Jan 14, 2021. It is now read-only.
/
voice.cr
304 lines (247 loc) · 9.61 KB
/
voice.cr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
require "uri"
require "./mappings/gateway"
require "./mappings/vws"
require "./websocket"
require "./sodium"
module Discord
class VoiceClient
UDP_PROTOCOL = "udp"
# The mode that tells Discord we want to send encrypted audio
ENCRYPTED_MODE = "xsalsa20_poly1305"
OP_IDENTIFY = 0
OP_SELECT_PROTOCOL = 1
OP_READY = 2
OP_HEARTBEAT = 3
OP_SESSION_DESCRIPTION = 4
OP_SPEAKING = 5
OP_HELLO = 8
# The heartbeat is the same every time, so it can be a constant
HEARTBEAT_JSON = {op: OP_HEARTBEAT, d: nil}.to_json
@udp : VoiceUDP
@sequence : UInt16 = 0_u16
@time : UInt32 = 0_u32
@endpoint : String
@server_id : UInt64
@user_id : UInt64
@session_id : String
@token : String
@heartbeat_interval : Int32?
@send_heartbeats = false
# Creates a new voice client. The *payload* should be a payload received
# from Discord as part of a VOICE_SERVER_UPDATE dispatch, received after
# sending a voice state update (gateway op 4) packet. The *session* should
# be the session currently in use by the gateway client on which the
# aforementioned dispatch was received, and the *user_id* should be the
# user ID of the account on which the voice client is created. (It is
# received as part of the gateway READY dispatch, for example)
def initialize(payload : Discord::Gateway::VoiceServerUpdatePayload,
session : Discord::Gateway::Session,
user_id : UInt64 | Snowflake, @logger = Logger.new(STDOUT))
@logger.progname = "discordcr"
@user_id = user_id.to_u64
@endpoint = payload.endpoint.gsub(":80", "")
@server_id = payload.guild_id.to_u64
@session_id = session.session_id
@token = payload.token
@websocket = Discord::WebSocket.new(
host: @endpoint,
path: "/",
port: 443,
tls: true,
logger: @logger
)
@websocket.on_message(&->on_message(Discord::WebSocket::Packet))
@websocket.on_close(&->on_close(String))
@udp = VoiceUDP.new
end
# Initiates the connection process and blocks forever afterwards.
def run
@send_heartbeats = true
spawn { heartbeat_loop }
@websocket.run
end
# Closes the VWS connection, in effect disconnecting from voice.
def close
@send_heartbeats = false
@websocket.close
end
# Sets the handler that should be run once the voice client has connected
# successfully.
def on_ready(&@ready_handler : ->)
end
# Sends a packet to indicate to Discord whether or not we are speaking
# right now
def send_speaking(speaking : Bool, delay : Int32 = 0)
packet = VWS::SpeakingPacket.new(speaking, delay)
@websocket.send(packet.to_json)
end
# Plays a single opus packet
def play_opus(buf : Bytes)
increment_packet_metadata
@udp.send_audio(buf, @sequence, @time)
end
# Increment sequence and time
private def increment_packet_metadata
@sequence += 1
@time += 960
end
private def heartbeat_loop
while @send_heartbeats
if @heartbeat_interval
@websocket.send(HEARTBEAT_JSON)
sleep @heartbeat_interval.not_nil!.milliseconds
else
sleep 1
end
end
end
private def on_message(packet : Discord::WebSocket::Packet)
@logger.debug("VWS packet received: #{packet} #{packet.data.to_s}")
case packet.opcode
when OP_READY
payload = VWS::ReadyPayload.from_json(packet.data)
handle_ready(payload)
when OP_SESSION_DESCRIPTION
payload = VWS::SessionDescriptionPayload.from_json(packet.data)
handle_session_description(payload)
when OP_HELLO
payload = VWS::HelloPayload.from_json(packet.data)
handle_hello(payload)
end
end
private def on_close(message : String)
if message.bytesize < 2
@logger.warn("VWS closed with data: #{message.bytes}")
return nil
end
code = IO::ByteFormat::BigEndian.decode(UInt16, message.to_slice[0, 2])
reason = message.byte_slice(2, message.bytesize - 2)
@logger.warn("VWS closed with code #{code}, reason: #{reason}")
nil
end
private def handle_ready(payload : VWS::ReadyPayload)
# We get a new heartbeat interval here that replaces the old one
@heartbeat_interval = payload.heartbeat_interval
udp_connect(payload.port.to_u32, payload.ssrc.to_u32)
end
private def udp_connect(port, ssrc)
@udp.connect(@endpoint, port, ssrc)
@udp.send_discovery
ip, port = @udp.receive_discovery_reply
send_select_protocol(UDP_PROTOCOL, ip, port, ENCRYPTED_MODE)
end
private def send_identify(server_id, user_id, session_id, token)
packet = VWS::IdentifyPacket.new(server_id, user_id, session_id, token)
@websocket.send(packet.to_json)
end
private def send_select_protocol(protocol, address, port, mode)
data = VWS::ProtocolData.new(address, port, mode)
packet = VWS::SelectProtocolPacket.new(protocol, data)
@websocket.send(packet.to_json)
end
private def handle_session_description(payload : VWS::SessionDescriptionPayload)
@udp.secret_key = Bytes.new(payload.secret_key.to_unsafe, payload.secret_key.size)
# Once the secret key has been received, we are ready to send audio data.
# Notify the user of this
spawn { @ready_handler.try(&.call) }
end
private def handle_hello(payload : VWS::HelloPayload)
@heartbeat_interval = payload.heartbeat_interval
send_identify(@server_id, @user_id, @session_id, @token)
end
end
# Client for Discord's voice UDP protocol, on which the actual audio data is
# sent. There should be no reason to manually use this class: use
# `VoiceClient` instead which uses this class internally.
class VoiceUDP
@secret_key : Bytes?
property secret_key
getter socket
def initialize
@socket = UDPSocket.new
end
def connect(endpoint : String, port : UInt32, ssrc : UInt32)
@ssrc = ssrc
@socket.connect(endpoint, port)
end
# Sends a discovery packet to Discord, telling them that we want to know our
# IP so we can select the protocol on the VWS
def send_discovery
data = Bytes.new(70)
IO::ByteFormat::BigEndian.encode(@ssrc.not_nil!, data[0, 4])
@socket.write(data)
end
# Awaits a response to the discovery request and returns our local IP and
# port once the response is received
def receive_discovery_reply : {String, UInt16}
buf = Bytes.new(70)
@socket.receive(buf)
# The first four bytes are just the SSRC again, we don't care about that
data = buf[4, buf.size - 4]
ip = String.new(data[0, 64]).delete("\0")
port = IO::ByteFormat::BigEndian.decode(UInt16, data[64, 2])
{ip, port}
end
# Sends 20 ms of opus audio data to Discord, with the specified sequence and
# time (used on the receiving client to synchronise packets)
def send_audio(buf, sequence, time)
header = create_header(sequence, time)
buf = encrypt_audio(header, buf)
new_buf = Bytes.new(header.size + buf.size)
header.copy_to(new_buf)
buf.copy_to(new_buf + header.size)
@socket.write(new_buf)
end
# :nodoc:
def create_header(sequence : UInt16, time : UInt32) : Bytes
bytes = Bytes.new(12)
# Write the magic bytes required by Discord
bytes[0] = 0x80_u8
bytes[1] = 0x78_u8
IO::ByteFormat::BigEndian.encode(sequence, bytes[2, 2])
IO::ByteFormat::BigEndian.encode(time, bytes[4, 4])
IO::ByteFormat::BigEndian.encode(@ssrc.not_nil!, bytes[8, 4])
bytes
end
private def encrypt_audio(header : Bytes, buf : Bytes) : Bytes
raise "No secret key was set!" unless @secret_key
nonce = Bytes.new(24, 0_u8) # 24 null bytes
header.copy_to(nonce) # First 12 bytes of nonce is the header
# Sodium constants
zero_bytes = Sodium.crypto_secretbox_xsalsa20poly1305_zerobytes
box_zero_bytes = Sodium.crypto_secretbox_xsalsa20poly1305_boxzerobytes
# Prepend the buf with zero_bytes zero bytes
message = Bytes.new(buf.size + zero_bytes, 0_u8)
buf.copy_to(message + zero_bytes)
# Create a buffer for the ciphertext
c = Bytes.new(message.size)
# Encrypt
Sodium.crypto_secretbox_xsalsa20poly1305(c, message, message.bytesize, nonce, @secret_key.not_nil!)
# The resulting ciphertext buffer has box_zero_bytes zero bytes prepended;
# we don't want them in the result, so move the slice forward by that many
# bytes
c + box_zero_bytes
end
end
# Utility function that runs the given block and measures the time it takes,
# then sleeps the given time minus that time. This is useful for voice code
# because (in most cases) voice data should be sent to Discord at a rate of
# one frame every 20 ms, and if the processing and sending takes a certain
# amount of time, then noticeable choppiness can be heard.
def self.timed_run(total_time : Time::Span)
delta = Time.measure { yield }
sleep_time = {total_time - delta, Time::Span.zero}.max
sleep sleep_time
end
# Runs the given block every *time_span*. This method takes into account the
# execution time for the block to keep the intervals accurate.
#
# Note that if the block takes longer to execute than the given *time_span*,
# there will be no delay: the next iteration follows immediately, with no
# attempt to get in sync.
def self.every(time_span : Time::Span)
loop do
timed_run(time_span) { yield }
end
end
end