# Egyptian Arabic (arz)

Hard to separate from Standard Arabic (arb).

In [1]:
LANGUAGE = 'arz'

In [2]:
from pyquery import PyQuery as pq
import sys
import requests

sys.path.append('../src')
import index

## The Joint Language University

In [3]:
DIRECTORY = 'https://jlu.wbtrain.com/sumtotal/language/DLI%20basic%20courses/Egyptian%20Arabic/Audio/'
SOURCE_NAME = 'The Joint Language University'
DATE = '2006-04-28'

In [4]:
def iter_lessons():
    doc = pq(DIRECTORY)

    for a in doc('a'):
        media_url = 'https://jlu.wbtrain.com' + a.attrib['href']
        title = a.text_content()

        if 'Lesson' in title and 'side' in title and media_url.endswith('.mp3'):
            yield {
                'source_url': media_url,
                'source_name': SOURCE_NAME,
                'language': LANGUAGE,
                'title': title[:-4],
                'media_urls': [media_url],
                'date': DATE,
            }

In [5]:
seen = index.scan()

for i, sample in enumerate(iter_lessons()):
    if sample['source_url'] in seen:
        print('{0}. {1} (skipping)'.format(i + 1, sample['title']),
              flush=True)
        continue
    
    print('{0}. {1}'.format(i + 1, sample['title']), flush=True)
    staged = index.stage_audio(sample['media_urls'][0], LANGUAGE, method='requests')
    if staged.checksum in seen:
        print('   SKIPPING: checksum already in index')
        continue
    
    sample['checksum'] = staged.checksum
    index.save(sample)

1. Module 01 Lesson 01-02 side a (skipping)
2. Module 01 Lesson 03-04 side a (skipping)
3. Module 01 Lesson 03-04 side b (skipping)
4. Module 02 Lesson 05-06 side b (skipping)
5. Module 02 Lesson 07-08 side a (skipping)
6. Module 02 Lesson 07-08 side b (skipping)
7. Module 03 Lesson 10 side 1 (skipping)
8. Module 03 Lesson 11 side 1 (skipping)
9. Module 03 Lesson 11 side 2 (skipping)
10. Module 03 Lesson 12 side 1 (skipping)
11. Module 04 Lesson 13 side a (skipping)
12. Module 04 Lesson 13 side b (skipping)
13. Module 04 Lesson 15 side a (skipping)
14. Module 04 Lesson 15 side b (skipping)
15. Module 04 Lesson 16 side a (skipping)
16. Module 05 Lesson 17-18 side a (skipping)
17. Module 05 Lesson 17-18 side b (skipping)
18. Module 05 Lesson 19-20 side a (skipping)
19. Module 05 Lesson 19-20 side b (skipping)
20. Module 07 Lesson 25-26 side a (skipping)
21. Module 07 Lesson 25-26 side b (skipping)
22. Module 07 Lesson 27-28 side a (skipping)
23. Module 07 Lesson 27-28 side b (skipping)
2