# Setup

In [2]:
import json
import csv

from pathlib import Path

# Reading plaintext

```
// For each Surah
[
    { 
        "number": <surah_number>,   // int
        "ayahs": [
            {
                "numbers": [], // int[]
                "occasions": [] // string[]
            }
        ],
    },
     ...
]
```

In [3]:
plaintext = Path('../../data/plaintext')

all_data = []
for surah_dir in plaintext.iterdir():

    surah_nr = int(surah_dir.name)
    surah_data = {
        "number": surah_nr,
        "ayahs": []
    }

    for ayahs_dir in surah_dir.iterdir():
        ayah_numbers = ayahs_dir.name.split('-')
        ayah_numbers = [int(a) for a in ayah_numbers]

        occasions = []
        for occasion_file in ayahs_dir.iterdir():

            with occasion_file.open('r', encoding='utf-8') as f:
                text = f.read()
                occasions.append(text)

        surah_data["ayahs"].append({
            "numbers": ayah_numbers,
            "occasions": occasions,
        })

    all_data.append(surah_data)


# Output

In [4]:
csv_dir = Path('../../data/structured/csv')
csv_dir.mkdir(parents=True, exist_ok=True)

json_dir = Path('../../data/structured/json')
json_dir.mkdir(parents=True, exist_ok=True)

JSON, generating a JSON file per Surah and one containing all occasions

In [5]:
# JSON
all_json = []

for surah_data in all_data:
    surah_number = surah_data['number']
    data = [
        {
            "surah": surah_number,
            "ayahs": ayahs['numbers'],
            "occasions": ayahs['occasions'],
        } 
        for ayahs in surah_data["ayahs"]
    ]

    all_json.extend(data)

    surah_json_path = json_dir / f"{surah_number:03d}.json"
    with surah_json_path.open('w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)


all_occasions_path = json_dir / 'all.json'
with all_occasions_path.open('w', encoding='utf-8') as f:
    json.dump(all_json, f, ensure_ascii=False, indent=2)


Same for CSV, one per Surah and one containing all occasions.

In [None]:
# CSV
all_csv_rows = []
header_row = ['surah', 'ayahs', 'occasion']

for surah_data in all_data:
    surah_number = surah_data['number']
    
    rows = []
    for ayahs in surah_data["ayahs"]:
        ayah_range = '-'.join(map(str, ayahs['numbers']))
        
        for occasion in ayahs['occasions']:
            row = [surah_number, ayah_range, occasion]
            rows.append(row)
            all_csv_rows.append(row)
    
    surah_csv_path = csv_dir / f"{surah_number:03d}.csv"
    with surah_csv_path.open('w', encoding='utf-8-sig', newline='') as f:
        writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
        writer.writerow(header_row)
        writer.writerows(rows)

all_csv_path = csv_dir / 'all.csv'
with all_csv_path.open('w', encoding='utf-8-sig', newline='') as f:
    writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
    writer.writerow(header_row)
    writer.writerows(all_csv_rows)
