-
-
Notifications
You must be signed in to change notification settings - Fork 26
/
replay.py
237 lines (199 loc) · 9.33 KB
/
replay.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# Copyright (c) 2017 https://github.com/ping
#
# This software is released under the MIT License.
# https://opensource.org/licenses/MIT
import argparse
import logging
import os
import re
import xml.etree.ElementTree
import subprocess
from contextlib import closing
import requests
try:
from .compat import compat_urllib_parse_urlparse
except ValueError:
# pragma: no cover
# To allow running in terminal
from compat import compat_urllib_parse_urlparse
logger = logging.getLogger(__file__)
MPD_NAMESPACE = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'}
class Downloader(object):
"""Downloads and assembles a given IG live replay stream"""
USER_AGENT = 'Instagram 10.26.0 (iPhone8,1; iOS 10_2; en_US; en-US; ' \
'scale=2.00; gamut=normal; 750x1334) AppleWebKit/420+'
DOWNLOAD_TIMEOUT = 15
def __init__(self, mpd, output_dir, user_agent=None, **kwargs):
"""
:param mpd: URL to mpd
:param output_dir: folder to store the downloaded files
:return:
"""
self.mpd = mpd
self.output_dir = output_dir
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
self.user_agent = user_agent or self.USER_AGENT
self.download_timeout = kwargs.pop('download_timeout', None) or self.DOWNLOAD_TIMEOUT
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(max_retries=2)
session.mount('http://', adapter)
session.mount('https://', adapter)
self.session = session
# custom ffmpeg binary path, fallback to ffmpeg_binary path in env if available
self.ffmpeg_binary = kwargs.pop('ffmpeg_binary', None) or os.getenv('FFMPEG_BINARY', 'ffmpeg')
xml.etree.ElementTree.register_namespace('', MPD_NAMESPACE['mpd'])
self.mpd_document = xml.etree.ElementTree.fromstring(self.mpd)
duration_attribute = self.mpd_document.attrib.get('mediaPresentationDuration', '')
mobj = re.match(r'PT(?P<hrs>\d+)H(?P<mins>\d+)M(?P<secs>\d+\.\d+)', duration_attribute)
if mobj:
duration = int(round(
int(mobj.group('hrs')) * 60 * 60 +
int(mobj.group('mins')) * 60 +
float(mobj.group('secs'))
))
else:
logger.warning('Unable to parse duration: {}'.format(duration_attribute))
duration = 0
self.duration = duration
def download(self, output_filename,
skipffmpeg=False,
cleartempfiles=True):
"""
Download and saves the generated file with the file name specified.
:param output_filename: Output file path
:param skipffmpeg: bool flag to not use ffmpeg to join audio and video file into final mp4
:param cleartempfiles: bool flag to remove downloaded and temp files
:return:
"""
periods = self.mpd_document.findall('mpd:Period', MPD_NAMESPACE)
logger.debug('Found {0:d} period(s)'.format(len(periods)))
generated_files = []
# Aaccording to specs, multiple periods are allow but IG only sends one usually
for period_idx, period in enumerate(periods):
adaptation_sets = period.findall('mpd:AdaptationSet', MPD_NAMESPACE)
audio_stream = None
video_stream = None
if not len(adaptation_sets) == 2:
logger.warning('Unexpected number of adaptation sets: {}'.format(len(adaptation_sets)))
for adaptation_set in adaptation_sets:
representations = adaptation_set.findall('mpd:Representation', MPD_NAMESPACE)
# sort representations by quality and pick best one
representations = sorted(
representations,
key=lambda rep: (
(int(rep.attrib.get('width', '0')) * int(rep.attrib.get('height', '0'))) or
int(rep.attrib.get('bandwidth', '0')) or
rep.attrib.get('FBQualityLabel') or
int(rep.attrib.get('audioSamplingRate', '0'))),
reverse=True)
representation = representations[0]
representation_id = representation.attrib.get('id', '')
mime_type = representation.attrib.get('mimeType', '')
logger.debug(
'Selected representation with mimeType {0!s} id {1!s} out of {2!s}'.format(
mime_type,
representation_id,
' / '.join([r.attrib.get('id', '') for r in representations])
))
representation_base_url = representation.find('mpd:BaseURL', MPD_NAMESPACE).text
logger.debug(representation_base_url)
if 'video' in mime_type and not video_stream:
video_stream = representation_base_url
elif 'audio' in mime_type and not audio_stream:
audio_stream = representation_base_url
if audio_stream and video_stream:
break
audio_file = os.path.join(
self.output_dir,
os.path.basename(compat_urllib_parse_urlparse(audio_stream).path)
)
video_file = os.path.join(
self.output_dir,
os.path.basename(compat_urllib_parse_urlparse(video_stream).path)
)
for target in ((audio_stream, audio_file), (video_stream, video_file)):
logger.debug('Downloading {} as {}'.format(*target))
with closing(self.session.get(
target[0],
headers={'User-Agent': self.user_agent, 'Accept': '*/*'},
timeout=self.download_timeout, stream=True)) as res:
res.raise_for_status()
with open(target[1], 'wb') as f:
for chunk in res.iter_content(chunk_size=1024*100):
f.write(chunk)
if skipffmpeg:
continue
if len(periods) > 1:
# Generate a new filename by appending n+1
# to the original specified output filename
# so that it looks like output-1.mp4, output-2.mp4, etc
dir_name = os.path.dirname(output_filename)
file_name = os.path.basename(output_filename)
dot_pos = file_name.rfind('.')
if dot_pos >= 0:
filename_no_ext = file_name[0:dot_pos]
ext = file_name[dot_pos:]
else:
filename_no_ext = file_name
ext = ''
generated_filename = os.path.join(
dir_name, '{0!s}-{1:d}{2!s}'.format(filename_no_ext, period_idx + 1, ext))
else:
generated_filename = output_filename
ffmpeg_loglevel = 'error'
if logger.level == logging.DEBUG:
ffmpeg_loglevel = 'warning'
cmd = [
self.ffmpeg_binary, '-y',
'-loglevel', ffmpeg_loglevel,
'-i', audio_file,
'-i', video_file,
'-c:v', 'copy',
'-c:a', 'copy',
generated_filename]
try:
exit_code = subprocess.call(cmd)
if exit_code:
logger.error('ffmpeg exited with the code: {0!s}'.format(exit_code))
logger.error('Command: {0!s}'.format(' '.join(cmd)))
continue
except Exception as call_err:
logger.error('ffmpeg exited with the error: {0!s}'.format(call_err))
logger.error('Command: {0!s}'.format(' '.join(cmd)))
continue
generated_files.append(generated_filename)
logger.debug('Generated {}'.format(generated_filename))
if cleartempfiles:
for f in (audio_file, video_file):
try:
os.remove(f)
except (IOError, OSError) as ioe:
logger.warning('Error removing {0!s}: {1!s}'.format(f, str(ioe)))
return generated_files
if __name__ == '__main__': # pragma: no cover
# pylint: disable-all
# Example of how to init and start the Downloader
parser = argparse.ArgumentParser()
parser.add_argument('mpd')
parser.add_argument('-v', action='store_true', help='Verbose')
parser.add_argument('-s', metavar='OUTPUT_FILENAME', required=True,
help='Output filename')
parser.add_argument('-o', metavar='DOWLOAD_DIR',
default='output/', help='Download folder')
parser.add_argument('-c', action='store_true', help='Clear temp files')
args = parser.parse_args()
if args.v:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
logging.basicConfig(level=logger.level)
with open(args.mpd, 'r') as mpd_file:
mpd_contents = mpd_file.read()
dl = Downloader(mpd=mpd_contents, output_dir=args.o)
try:
generated_files = dl.download(args.s, cleartempfiles=args.c)
print('Video Duration: %s' % dl.duration)
print('Generated files: \n%s' % '\n'.join(generated_files))
except KeyboardInterrupt:
logger.info('Interrupted')