In [1]:
import orjson
import asyncio
import httpx
import pandas as pd

In [2]:
arb_urls = [f"https://data.quranwbw.com/{surah_num}/word-translations/arabic.json" for surah_num in range(1, 115)]
eng_urls = [f"https://data.quranwbw.com/{surah_num}/word-translations/english.json" for surah_num in range(1, 115)]
bn_urls = [f"https://data.quranwbw.com/{surah_num}/word-translations/bangla.json" for surah_num in range(1, 115)]


In [3]:
async def fetch(url):
    async with httpx.AsyncClient() as client:
        response = await client.get(url)
        return orjson.loads(response.text)



In [4]:
fetch_arb_tasks = [asyncio.create_task(fetch(url)) for url in arb_urls]
fetch_eng_tasks = [asyncio.create_task(fetch(url)) for url in eng_urls]
fetch_bn_tasks = [asyncio.create_task(fetch(url)) for url in bn_urls]
arb_res = await asyncio.gather(*fetch_arb_tasks)
eng_res = await asyncio.gather(*fetch_eng_tasks)
bn_res = await asyncio.gather(*fetch_bn_tasks)



In [5]:
# words_by_surahs = []
# for idx in range(len(arb_res)):
#     if idx == 113:
#         print(arb_res[idx])
#     for ayah in arb_res[idx].keys():
#         all_words = arb_res[idx][ayah]['w'].split("|")
# #         eng_trans = eng_res[idx].split("//")
#         words = [
#             {
#                 "page": arb_res[idx][ayah]['p'],
#                 "ayah": ayah,
#                 "word": word.split('/')[1],
#                 "surah": idx + 1 
#             } for word in all_words]
#         words_by_surahs.extend(words)
#         if idx == 113:
#             print(words)
    

In [6]:
bn_words = [{ayah: {"bn_w": surah[ayah]} for ayah in surah.keys()} for surah in bn_res]
en_words = [{ayah: {"en_w": surah[ayah]} for ayah in surah.keys()} for surah in eng_res]
all_wrds = list(zip(arb_res, en_words, bn_words))
all_wrds[0]

({'1': {'p': 1,
   'w': "بِسْمِ/بِسۡمِ/./bis'mi|اللّٰهِ/ٱللَّهِ/./al-lahi|الرَّحْمٰنِ/ٱلرَّحۡمَٰنِ/./al-rahmani|الرَّحِیْمِ/ٱلرَّحِيمِ/./al-rahimi",
   'e': '۟\uf500/١/.'},
  '2': {'p': 1,
   'w': "اَلْحَمْدُ/ٱلۡحَمۡدُ/./al-hamdu|لِلّٰهِ/لِلَّهِ/./lillahi|رَبِّ/رَبِّ/./rabbi|الْعٰلَمِیْنَ/ٱلۡعَٰلَمِينَ/./al-'alamina",
   'e': '۟ۙ\uf501/٢/.'},
  '3': {'p': 1,
   'w': 'الرَّحْمٰنِ/ٱلرَّحۡمَٰنِ/./al-rahmani|الرَّحِیْمِ/ٱلرَّحِيمِ/./al-rahimi',
   'e': '۟ۙ\uf502/٣/.'},
  '4': {'p': 1,
   'w': 'مٰلِكِ/مَٰلِكِ/./maliki|یَوْمِ/يَوۡمِ/./yawmi|الدِّیْنِ/ٱلدِّينِ/./al-dini',
   'e': '۟ؕ\uf503/٤/.'},
  '5': {'p': 1,
   'w': "اِیَّاكَ/إِيَّاكَ/./iyyaka|نَعْبُدُ/نَعۡبُدُ/./na'budu|وَاِیَّاكَ/وَإِيَّاكَ/./wa-iyyaka|نَسْتَعِیْنُ/نَسۡتَعِينُ/./nasta'inu",
   'e': '۟ؕ\uf504/٥/.'},
  '6': {'p': 1,
   'w': "اِهْدِنَا/ٱهۡدِنَا/./ih'dina|الصِّرَاطَ/ٱلصِّرَٰطَ/./al-sirata|الْمُسْتَقِیْمَ/ٱلۡمُسۡتَقِيمَ/./al-mus'taqima",
   'e': '۟ۙ\uf505/٦/.'},
  '7': {'p': 1,
   'w': "صِرَاطَ/صِرَٰطَ/./sirata|الَّذِیْنَ/ٱل

In [7]:
combined_translations = []
for arb_wrds, en, bn in zip(arb_res, en_words, bn_words):
    for surah in arb_wrds.keys():
        wrd_trans = arb_wrds[surah]
        arb_wrds[surah].update(**en[surah], **bn[surah])
    combined_translations.append(arb_wrds)
combined_translations[0:7]

In [8]:
words_by_surahs = []
for idx in range(len(combined_translations)):
    for ayah in arb_res[idx].keys():
        arb_words = arb_res[idx][ayah]['w'].split("|")
        en_words = arb_res[idx][ayah]['en_w'].split("//")
        bn_words = arb_res[idx][ayah]['bn_w'].split("//")
#         eng_trans = eng_res[idx].split("//")
        words = [
            {
                "page": arb_res[idx][ayah]['p'],
                "ayah": ayah,
                "ar": ar.split('/')[1],
                "en": en,
                "bn": bn,
                "surah": idx + 1 
            } for ar, en, bn in zip(arb_words, en_words, bn_words)]
        words_by_surahs.extend(words)
    

In [9]:
words_by_surahs[0:7]

[{'page': 1,
  'ayah': '1',
  'ar': 'بِسۡمِ',
  'en': 'In (the) name',
  'bn': 'নামে',
  'surah': 1},
 {'page': 1,
  'ayah': '1',
  'ar': 'ٱللَّهِ',
  'en': '(of) Allah',
  'bn': 'আল্লাহ (র)',
  'surah': 1},
 {'page': 1,
  'ayah': '1',
  'ar': 'ٱلرَّحۡمَٰنِ',
  'en': 'the Most Gracious',
  'bn': 'পরম করুণাময়',
  'surah': 1},
 {'page': 1,
  'ayah': '1',
  'ar': 'ٱلرَّحِيمِ',
  'en': 'the Most Merciful',
  'bn': 'অসীম দয়ালু',
  'surah': 1},
 {'page': 1,
  'ayah': '2',
  'ar': 'ٱلۡحَمۡدُ',
  'en': 'All praises and thanks',
  'bn': 'সকল প্রশংসা',
  'surah': 1},
 {'page': 1,
  'ayah': '2',
  'ar': 'لِلَّهِ',
  'en': '(be) to Allah',
  'bn': 'আল্লাহ্\u200cরই জন্য',
  'surah': 1},
 {'page': 1,
  'ayah': '2',
  'ar': 'رَبِّ',
  'en': 'the Lord',
  'bn': '(যিনি) রব',
  'surah': 1}]

In [10]:
finalized_transation = pd.json_normalize(words_by_surahs)

In [11]:
finalized_transation.to_csv("quran_word_translation.tsv", index=False, sep='\t')