 # Export Articles (Optional)

 <h4>This notebook will export articles collected by the previous notebook
 to zip archives for storage and preprocessing.</h4>

For more information and further instructions, consult the Chomp documentation at https://github.com/seangilleran/we1s_chomp.

 ## INFO
 
__authors__    = 'Sean Gilleran'  
__copyright__  = 'copyright 2019, The WE1S Project'  
__license__    = 'MIT'  
__version__    = '0.1.0'  
__email__      = 'sgilleran@ucsb.edu'


 ## SETTINGS

In [None]:
import json
from pathlib import Path
from zipfile import ZipFile


project_dir = Path.home() / "write" / "dev" / "we1s_chomp"
article_dir = project_dir / "data" / "json" / "articles"
export_dir = project_dir / "data" / "export"

# Make the export directory if it does not already exist.
if not export_dir.exists():
    export_dir.mkdir(parents=True)
    
print(f"Loading articles from {article_dir}.")
print(f"Exporting archives to {export_dir}.\n\n")

 ## EXPORT

In [None]:
archives = {}

for filename in article_dir.glob("**/*.json"):
    with open(filename, encoding="utf-8") as jsonfile:
        article = json.load(jsonfile)
        articles.append(article)
        
        # Store "no-exact-match" articles in separate archives.
        archive_name = article["query"]
        if "no-exact-match" in str(filename):
            archive_name += "(no-exact-match)"
        
        if not archives.get(archive_name, None):
            archives[archive_name] = []
        archives[archive_name].append(article)

for archive_name in archives:
    archive_filename = export_dir / f"{archive_name}.zip"
    print(f"Creating archive {archive_filename}...")
    with ZipFile(archive_filename, "w") as zipfile:
        for article in archives[archive_name]:
            original_filename = article_dir / f'{article["name"]}.json'
            new_filename = f'{article["name"]}.json'
            zipfile.write(original_filename, new_filename)
            print(f"- {new_filename}")
    print("Done!\n\n")
print("\n\n----------Time----------")

 ## SEE EXPORTED FILES

In [None]:
# TODO: link to dir