Skip to content
This repository has been archived by the owner on Jun 3, 2020. It is now read-only.

Commit

Permalink
image downloading
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasf committed Apr 6, 2011
1 parent d01dba8 commit 080486c
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 12 deletions.
6 changes: 5 additions & 1 deletion README.rst
Expand Up @@ -57,9 +57,13 @@ Known issues
============
Near future improvements:
* Target file names are some times less than optimal.
* Image/attachment downloading not implemented.
* Rewriting of image/attachment links if they are downloaded
* Meaningful translation/filtering of wikipedia publish statuses into something that usable within a fairly standard jekyll setup.

Things I want to do to learn writing better python code:
* Refactor code to use less nesting
* Refactor code to use more try/except tests instead of if statements

Things that might be resolved later on if I find the time:
* There will probably be issues when migrating non utf-8 encoded wordpress dump files (if they exist).
* Integrate one or a few basic jekyll site templates to render complete working jekyll blog setups from wordpress exports.
2 changes: 1 addition & 1 deletion config.yaml
Expand Up @@ -13,7 +13,7 @@ target_format: markdown
date_format: '%Y-%m-%d %H:%M:%S'

# Try to download and reloacate all images locally to the blog.
download_images: false
download_images: False

# Item types we don't want to import.
item_type_filter: {attachment, nav_menu_item}
Expand Down
28 changes: 18 additions & 10 deletions exitwp.py
Expand Up @@ -10,7 +10,8 @@
import yaml
import tempfile
from BeautifulSoup import BeautifulSoup
from urlparse import urlparse
from urlparse import urlparse, urljoin
from urllib import urlretrieve


'''
Expand Down Expand Up @@ -195,10 +196,10 @@ def get_item_uid(item, date_prefix=False, namespace=''):
result=fn
return result

def get_item_path(item, date_prefix=False, dir='', namespace=''):
def get_item_path(item, dir=''):
full_dir=get_full_dir(dir)
filename_parts=[full_dir,'/']
filename_parts.append(get_item_uid(item, date_prefix=date_prefix, namespace=namespace))
filename_parts.append(item['uid'])
filename_parts.append('.')
filename_parts.append(target_format)
return ''.join(filename_parts)
Expand All @@ -222,10 +223,15 @@ def get_attachment_path(src, dir, dir_prefix='a'):
file_infix=file_infix+1
files[src]=filename=maybe_filename

target_name=os.path.normpath(blog_dir+'/'+dir_prefix +'/' + dir+'/'+filename)
target_dir=os.path.normpath(blog_dir+'/'+dir_prefix +'/' + dir)
target_file=os.path.normpath(target_dir+'/'+filename)

if (not os.path.exists(target_dir)):
os.makedirs(target_dir)

#if src not in attachments[dir]:
print target_name
return target_name
##print target_name
return target_file

#data['items']=[]

Expand All @@ -243,11 +249,12 @@ def get_attachment_path(src, dir, dir_prefix='a'):
}

if i['type'] == 'post':

fn=get_item_path(i, date_prefix=True, dir='_posts')
i['uid']=get_item_uid(i,date_prefix=True)
fn=get_item_path(i, dir='_posts')
out=open_file(fn)
yaml_header['layout']='post'
elif i['type'] == 'page':
i['uid']=get_item_uid(i)
fn=get_item_path(i)
out=open_file(fn)
yaml_header['layout']='page'
Expand All @@ -257,8 +264,9 @@ def get_attachment_path(src, dir, dir_prefix='a'):
print "Unknown item type :: " + i['type']


for a in i['img_srcs']:
get_attachment_path(a,fn)
if download_images:
for img in i['img_srcs']:
urlretrieve(urljoin(data['header']['link'],img.decode('utf-8')), get_attachment_path(img, i['uid']))


if out is not None:
Expand Down

0 comments on commit 080486c

Please sign in to comment.