Skip to content
Permalink
Browse files

fixes for sometimes-missing 'info'

  • Loading branch information...
martymcguire committed May 10, 2019
1 parent b3688ff commit 054bfba36344d0e40b3de4a8fdf9c4de96bc8f25
Showing with 24 additions and 20 deletions.
  1. +20 −15 download_images.py
  2. +3 −3 export_messages.py
  3. +1 −2 schema.py
@@ -1,4 +1,4 @@

import sys
from pathlib import Path
from urllib.parse import urlparse

@@ -19,20 +19,25 @@ def download_stem(message, prefer_thumbnails):
def run_downloads(messages, download_dir, prefer_thumbnails):
for msg in messages:
image_url = (msg.thumbnail_url if prefer_thumbnails else None) or msg.image_url
res = requests.head(get_download_url(image_url))
assert res.status_code == 200
mtype, subtype = res.headers['content-type'].split('/', 2)
if mtype != 'image':
print(f"Skipping {image_url}: {res.headers['content-type']}")
continue

res = requests.get(get_download_url(image_url))
assert res.status_code == 200
filename = (download_dir / download_stem(msg, prefer_thumbnails)
).with_suffix('.' + subtype)
print('Downloading', image_url, '->', filename)
with open(filename, 'wb') as fp:
fp.write(res.content)
download_url = get_download_url(image_url).replace('//maktro.net','//matrix.maktro.net:8448')
try:
res = requests.head(download_url, verify=False)
assert res.status_code == 200
mtype, subtype = res.headers['content-type'].split('/', 2)
if mtype != 'image':
print(f"Skipping {download_url}: {res.headers['content-type']}")
continue

res = requests.get(download_url, verify=False)
assert res.status_code == 200
filename = (download_dir / download_stem(msg, prefer_thumbnails)
).with_suffix('.' + subtype)
print('Downloading', download_url, '->', filename)
with open(filename, 'wb') as fp:
fp.write(res.content)
except:
e = sys.exc_info()[0]
print( "<p>Error downloading '%s' : %s</p>" % (download_url, e) )


@click.command()
@@ -32,12 +32,12 @@ def replace_by_local_image(data):
data = data.copy()
content = data['content']
if content.get('msgtype') == 'm.image':
url, mimetype = content['url'], content['info']['mimetype']
if 'thumbnail_url' in content['info']:
url, mimetype = content['url'], content.get('info', {}).get('mimetype', 'image/jpeg')
if 'thumbnail_url' in content.get('info', {}):
url, mimetype = content['info']['thumbnail_url'], content['info']['thumbnail_info']['mimetype']
_, subtype = mimetype.split('/', 2)
url = urlparse(url)
content['url'] = 'thumbnails/' + url.path.strip('/') + '.' + subtype
content['url'] = 'images/' + os.path.basename(url.path.strip('/')) + '.' + subtype
return data


@@ -19,5 +19,4 @@ def image_url(self):

@property
def thumbnail_url(self):
return (self.content['info'].get('thumbnail_url')
if self.is_image() else None)
return self.content['url'] if self.is_image() else None

0 comments on commit 054bfba

Please sign in to comment.
You can’t perform that action at this time.