 # Export Documents (Optional)

 <h4>This notebook will export documents collected by the previous notebook
 to zip archives for storage and preprocessing.</h4>

For more information and further instructions, consult the Chomp documentation at https://github.com/seangilleran/we1s_chomp.

 ## INFO
 
__authors__    = 'Sean Gilleran'  
__copyright__  = 'copyright 2019, The WE1S Project'  
__license__    = 'MIT'  
__version__    = '0.1.0'  
__email__      = 'sgilleran@ucsb.edu'


 ## SETTINGS

In [1]:
import json
from pathlib import Path
from zipfile import ZipFile


project_dir = Path.home() / "write" / "dev" / "we1s_chomp"
document_dir = project_dir / "data" / "json" / "documents"
export_dir = project_dir / "data" / "export"

# Make the export directory if it does not already exist.
if not export_dir.exists():
    export_dir.mkdir(parents=True)
    
print(f"Loading documents from {document_dir}.")
print(f"Exporting archives to {export_dir}.\n\n")

Loading documents from /home/jovyan/write/dev/we1s_chomp/data/json/documents.
Exporting archives to /home/jovyan/write/dev/we1s_chomp/data/export.




 ## EXPORT

In [13]:
archives = {}

for filename in document_dir.glob("**/*.json"):
    with open(filename, encoding="utf-8") as jsonfile:
        document = json.load(jsonfile)
        documents.append(document)
        
        # Store "no-exact-match" documents in separate archives.
        archive_name = document["query"]
        if "no-exact-match" in str(filename):
            archive_name += "(no-exact-match)"
        
        if not archives.get(archive_name, None):
            archives[archive_name] = []
        archives[archive_name].append(document)

for archive_name in archives:
    archive_filename = export_dir / f"{archive_name}.zip"
    print(f"Creating archive {archive_filename}...")
    with ZipFile(archive_filename, "w") as zipfile:
        for document in archives[archive_name]:
            original_filename = document_dir / f'{document["name"]}.json'
            new_filename = f'{document["name"]}.json'
            zipfile.write(original_filename, new_filename)
            print(f"- {new_filename}")
    print("Done!\n\n")
print("\n\n----------Time----------")

Creating archive /home/jovyan/write/dev/we1s_chomp/data/export/libcom-org_humanities_01-01-2000_12-31-2019.zip...
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_4.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_5.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_6.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_7.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_8.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_0.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_1.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_2.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_3.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_9.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_10.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_11.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_12.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_13.json
- chomp_libcom-org_humanities_01-01-2000_12-31-2019_14.j

 ## SEE EXPORTED FILES

In [4]:
# TODO: link to dir