 # IMPORT CSV CONFIGURATION

 This notebook will source and query metadata from CSV files, which was
 standard in older iterations of the Chomp software, and translate them into
 the JSON format used internally by more recent versions.

 For more information and further documentation and instructions, consult the
 Chomp documentation at https://github.com/kwgws/we1s_chomp.

 ## INFO
  
__authors__    = 'Catherine Gilleran'  
__copyright__  = 'copyright 2019, The WE1S Project'  
__license__    = 'MIT'  
__version__    = '0.1.0'  


 ## SETTINGS

In [None]:
import csv
import json
from pathlib import Path

project_dir = Path.home() / "write" / "dev" / "we1s_chomp"
source_dir = project_dir / "data" / "json" / "sources"
query_dir = project_dir / "data" / "json" / "queries"

if not source_dir.exists():
    source_dir.mkdir(parents=True)
if not query_dir.exists():
    query_dir.mkdir(parents=True)
    
print(f"Loading sources from {source_dir}.")
print(f"Loading queries from {query_dir}.\n\n")

 ## CSV FILES

 Select files to import. Default import location is the `import` directory.
  Set `None` to skip if you only one of the two.

In [None]:
sources_csv_files = [
    project_dir / "data" / "import" / "sources.csv",
]

queries_csv_files = [
    project_dir / "data" / "import" / "queries.csv",
]

# Check files
all_ok = True
if sources_csv_files is not None:
    for source_file in sources_csv_files:
        if not source_file.exists():
            print(f"ERR: {source_file} does not exist.")
            all_ok = False
if queries_csv_files is not None:
    for query_file in queries_csv_files:
        if not query_file.exists():
            print(f"ERR: {query_file} does not exist.")
            all_ok = False
if (not sources_csv_files or len(sources_csv_files) < 1) and (
    not queries_csv_files or len(queries_csv_files) < 1
):
    print("ERR: Nothing imported!")
if all_ok:
    print(f"All files found. Good to go!\n\n")

 ## IMPORT SOURCES (Optional)
 

In [None]:
for csv_filename in sources_csv_files:

    print(f'Importing sources from "{csv_filename}".')

    count = 0

    with open(csv_filename, newline="", encoding="utf-8") as csvfile:
        for source in csv.DictReader(csvfile):

            parsed_source = {
                "name": source["name"],
                "title": source["title"],
                "webpage": source["url"],
                "contentType": source.get("contentType", "website"),
                "country": source.get("country", ""),
                "language": source.get("language", ""),
                "copyright": source.get("copyright", "")
            }

            filename = source_dir / f'{source["name"]}.json'
            with open(filename, "w", encoding="utf-8") as jsonfile:
                count += 1
                json.dump(parsed_source, jsonfile, indent=4, ensure_ascii=False)
                print(f"- {filename}")

    print(f'Done! Found {count} sources in "{csv_filename}".\n\n')

 ## IMPORT QUERIES (Optional)

In [None]:
from we1s_chomp import db

for csv_filename in queries_csv_files:

    print(f'Importing queries from "{csv_filename}"...')

    count = 0

    with open(csv_filename, newline="", encoding="utf-8") as csvfile:
        for query in csv.DictReader(csvfile):

            # Create query name: source_term_startDate_endDate(.json)
            name = "_".join(
                [
                    query["source"],
                    query["term"],
                    query["startDate"].replace("/", "-"),
                    query["endDate"].replace("/", "-"),
                ]
            )

            parsed_query = {
                "name": name,
                "title": name,
                "source": query["source"],
                "query_str": query["term"],
                "start_date": query["startDate"].replace("/", "-"),
                "end_date": query["endDate"].replace("/", "-")
            }

            # Update source with query name.
            source = db.load_source(parsed_query["source"], source_dir)
            source.queries.add(parsed_query["name"])
            db.save_source(source, source_dir)

            filename = query_dir / f"{name}.json"
            with open(filename, "w", encoding="utf-8") as jsonfile:
                count += 1
                json.dump(parsed_query, jsonfile, indent=4, ensure_ascii=False)
                print(f"- {filename}")

    print(f'Done! Found {count} queries in "{csv_filename}".\n\n')

 ## NEXT NOTEBOOK

In [None]:
# TODO: next notebook code
# Go to 01_responses.ipynb