In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
from pathlib import Path
ROOT_PATH = Path().resolve().parent
if str(ROOT_PATH) not in sys.path:
    sys.path.insert(ROOT_PATH, 1)

In [None]:
import json
import ipywidgets as widgets
from IPython.display import display
from autoeq.constants import PEQ_CONFIGS
from autoeq.batch_processing import batch_processing
ROOT_PATH = Path().resolve().parent
from dbtools.rtings_crawler import RtingsCrawler
from dbtools.crinacle_crawler import CrinacleCrawler
from dbtools.oratory1990_crawler import Oratory1990Crawler
from dbtools.innerfidelity_crawler import InnerfidelityCrawler
from dbtools.headphonecom_crawler import HeadphonecomCrawler
from dbtools.hypethe_sonics_crawler import HypetheSonicsCrawler
from dbtools.squig_crawler import SquigCrawler, SquigCrawlerManager
from dbtools.prune_results import prune_results
from dbtools.update_result_indexes import update_all_indexes, write_webapp_entries_and_measurements
from dbtools.constants import TARGETS_PATH, MEASUREMENTS_PATH, RESULTS_PATH

# Crawling and Parsing
Additional Python packages are required for processing the measurements:
```bash
python -m pip install -U -r dbtools/requirements.txt
```

This notebook uses IPyWidgets
```bash
jupyter nbextension enable --py widgetsnbextension --sys-prefix
```

Finally install IPython kernel
```bash
python -m ipykernel install --user --name="autoeq"
```

Measurement crawlers require [Google Chrome](https://www.google.com/chrome/) installed and
[ChromeDriver](https://googlechromelabs.github.io/chrome-for-testing/) binary in the measurements folder (or anywhere
in the PATH).

Measurement crawlers also require C++. This should be installed by default on Linux but on Windows you need to install
Microsoft Visual Studio build tools for this. https://visualstudio.microsoft.com/downloads/ ->
"Tools for Visual Studio 2019" -> "Build Tools for Visual Studio 2019".

oratory1990 crawler requires Ghostscript installed: https://www.ghostscript.com/download/gsdnld.html

## Clear `phone_books` to Get Latest Measurements
crinacle, squig.link and HypetheSonics crawlers use `phone_book*.json` files. These should be deleted to allow crawlers to download fresh versions.

In [None]:
for fp in MEASUREMENTS_PATH.glob('**/phone_book*.json'):
    fp.unlink()

## Crinacle
Download measurement data from Drive folder to `measurements/crinacle/raw_data/` before running this!

* `IEM Measurements/IEC60318-4 IEM Measurements (TSV txt)` into `AutoEq/measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)`
* `IEM Measurements/4620 IEM Measurements` into `AutoEq/measurements/crinacle/raw_data/4620 IEM Measurements`
* `HP Measurements/EARS + 711 (TSV txt) (Legacy)` into `AutoEq/measurements/crinacle/raw_data/EARS + 711 (TSV txt) (Legacy)`
* `GRAS 43AG-7` into `AutoEq/measurements/crinacle/raw_data/GRAS 43AG-7`

In [None]:
crawler = CrinacleCrawler()
crawler.run()
display(crawler.widget)

In [None]:
crawler.process(new_only=True)

## oratory1990
oratory1990 crawler fetches all measurements from https://www.reddit.com/r/oratory1990/wiki/index/list_of_presets/, downloads PDFs and reads the frequency response measurement data from the PDFs. Parsing the PDFs requires [Ghostscript](https://www.ghostscript.com/download/gsdnld.html) to be installed on the system.

In [None]:
crawler = Oratory1990Crawler()

#### Delete Existing PDFs
Optionally delete existing PDFs to get latest measurements for existing items

In [None]:
for fp in crawler.measurements_path.joinpath('pdf').glob('*.pdf'):
    fp.unlink()

#### Crawl

In [None]:
crawler.run()
display(crawler.widget)

#### Process
This downloads PDF (if it doesn't exist), converts it to an image using Ghostscript and parses the image.

Set `new_only=False` to reprocess all existing measurements

In [None]:
crawler.process(new_only=True)

## Rtings
Rtings crawler fetches all measurements from https://www.rtings.com/headphones/1-[2,4,5,6]/graph and downloads raw FR JSON files and parses them.

In [None]:
crawler = RtingsCrawler()
crawler.run()
display(crawler.widget)

In [None]:
crawler.process(new_only=True)

## HypetheSonics

In [None]:
crawler = HypetheSonicsCrawler()
crawler.run()
display(crawler.widget)

In [None]:
crawler.process(new_only=True)

## Squig.link
Several databases included in squig.link

**TODO:** rig information for cralwers, somehow

In [None]:
manager = SquigCrawlerManager()
#print(', '.join(sorted([crawler.username for crawler in manager.crawlers])))
#print(json.dumps({crawler.username: crawler.name for crawler in sorted(manager.crawlers, key=lambda c: c.username)}, indent=4, ensure_ascii=False))

In [None]:
display(manager.run('Auriculares Argentina').widget)

In [None]:
manager.process('Auriculares Argentina', new_only=True)

In [None]:
display(manager.run('Bakkwatan').widget)

In [None]:
manager.process('Bakkwatan', new_only=True)

In [None]:
display(manager.run('Hi End Portable').widget)

In [None]:
manager.process('Hi End Portable', new_only=True)

**TODO:** DHRME over-ear rig

In [None]:
display(manager.run('DHRME').widget)

In [None]:
manager.process('DHRME', new_only=True)

In [None]:
display(manager.run('Fahryst').widget)

In [None]:
manager.process('Fahryst', new_only=True)

In [None]:
display(manager.run('Filk').widget)

In [None]:
manager.process('Filk', new_only=True)

In [None]:
display(manager.run('freeryder05').widget)

In [None]:
manager.process('freeryder05', new_only=True)

In [None]:
# Not in squigsites.json
#display(manager.run('gadgetgenerations').widget)

In [None]:
display(manager.run('Harpo').widget)

In [None]:
manager.process('Harpo', new_only=True)

In [None]:
# Rig not known yet
#display(manager.run('ish').widget)

In [None]:
# Rig not known yet
#display(manager.run('jacstone').widget)

In [None]:
display(manager.run('Jaytiss').widget)

In [None]:
manager.process('Jaytiss', new_only=True)

In [None]:
display(manager.run('Kazi').widget)

In [None]:
manager.process('Kazi', new_only=True)

In [None]:
display(manager.run('kr0mka').widget)

In [None]:
manager.process('kr0mka', new_only=True)

In [None]:
display(manager.run('Kuulokenurkka').widget)

In [None]:
manager.process('Kuulokenurkka', new_only=True)

In [None]:
# Not in squigsites.json
#display(manager.run('mini').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('psykano').widget)

In [None]:
display(manager.run('Regan Cipher').widget)

In [None]:
manager.process('Regan Cipher', new_only=True)

In [None]:
display(manager.run('RikudouGoku').widget)

In [None]:
manager.process('RikudouGoku', new_only=True)

In [None]:
# Not in squigsites.json
#display(manager.run('sdlib').widget)

In [None]:
# Almost all measurements are with 3rd party eartips
#display(manager.run('smirk').widget)

In [None]:
# Rig not known yet
#display(manager.run('soundcheck39').widget)

In [None]:
display(manager.run('Super Review').widget)

In [None]:
manager.process('Super Review', new_only=True)

In [None]:
display(manager.run('Ted\'s Squig Hoard').widget)

In [None]:
manager.process('Ted\'s Squig Hoard', new_only=True)

In [None]:
# Not in squigsites.json
#display(manager.run('therollo9').widget)

In [None]:
display(manager.run('ToneDeafMonk').widget)

In [None]:
manager.process('ToneDeafMonk', new_only=True)

In [None]:
# Not in squigsites.json
#display(manager.run('wdym').widget)

In [None]:
# Only 3 models
#display(manager.run('yanyin').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('zamo').widget)

## Rename Measurements
Sometimes measurements are named incorrectly or previously only one sample existed and now multiple samples have been measured and so the original one needs to be renamed as "<name> (sample 1)"

In [None]:
crinacle = CrinacleCrawler()
headphonecom = HeadphonecomCrawler()
innerfidelity = InnerfidelityCrawler()
oratory1990 = Oratory1990Crawler()
rtings = RtingsCrawler(driver=oratory1990.driver)

renames = [
    {'old_name': 'Simgot Audio EA500 (bronze nozzle)', 'new_name': 'Simgot Audio EA500LM (bronze nozzle)', 'crawlers': [manager.crawler('Harpo')]},
    {'old_name': 'Simgot Audio EA500 LM (black nozzle)', 'new_name': 'Simgot Audio EA500LM (black nozzle)', 'crawlers': [manager.crawler('Harpo'), manager.crawler('Kazi'), manager.crawler('Regan Cipher')]},
    {'old_name': 'Simgot Audio EA500 LM (red nozzle)', 'new_name': 'Simgot Audio EA500LM (red nozzle)', 'crawlers': [manager.crawler('Harpo'), manager.crawler('Kazi'), manager.crawler('Regan Cipher')]},
]
for rename in renames:
    for crawler in rename['crawlers']:
        crawler.rename_measurement(old_name=rename['old_name'], new_name=rename['new_name'], dry_run=False)

## Prune Results
Check if obsolete results (e.g. because of renaming) exist and remove them

In [None]:
prune_results(databases=['crinacle', 'oratory1990', 'Rtings', 'Harpo', 'Kazi', 'Regan Cipher'], dry_run=True)

## Update Results
Creates new results from the measurements. `eq_kwargs` are parameters shared by all jobs.

In [None]:
def update_results(source_name, form, **override_kwargs):
    kwargs = {
        'input_dir': MEASUREMENTS_PATH.joinpath(source_name, 'data', form),
        'output_dir': RESULTS_PATH.joinpath(source_name, form),
        'bass_boost_gain': 6.0 if form == 'over-ear' else (0.0 if form == 'earbud' else 8.0),
        'target': TARGETS_PATH.joinpath('Harman over-ear 2018 without bass.csv' if form == 'over-ear' else 'AutoEq in-ear.csv'),
        'bass_boost_fc': 105, 'bass_boost_q': 0.7,
        'parametric_eq': True, 'ten_band_eq': True, 'convolution_eq': True,
        'parametric_eq_config': [PEQ_CONFIGS['4_PEAKING_WITH_LOW_SHELF'], PEQ_CONFIGS['4_PEAKING_WITH_HIGH_SHELF']],
        'fs': [44100, 48000],
        'thread_count': 0,
    }
    if override_kwargs:
        kwargs.update(override_kwargs)
    batch_processing(**kwargs)

### oratory1990

In [None]:
update_results('oratory1990', 'over-ear', new_only=False)

In [None]:
update_results('oratory1990', 'in-ear', new_only=False)

In [None]:
update_results('oratory1990', 'earbud', new_only=False)

### crinacle

In [None]:
update_results('crinacle', 'over-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'over-ear', 'EARS + 711'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'EARS + 711 over-ear'),
    target=TARGETS_PATH.joinpath('crinacle EARS + 711 Harman over-ear 2018 without bass.csv'))

In [None]:
update_results('crinacle', 'over-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'over-ear', 'GRAS 43AG-7'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'GRAS 43AG-7 over-ear'))

In [None]:
update_results('crinacle', 'in-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'in-ear', 'Bruel & Kjaer 4620'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'Bruel & Kjaer 4620 in-ear'),
    target=TARGETS_PATH.joinpath('JM-1 with Harman treble filter.csv'),
    bass_boost_gain=6.5)

In [None]:
update_results('crinacle', 'in-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'in-ear', '711'),
    output_dir=RESULTS_PATH.joinpath('crinacle', '711 in-ear'))

### Rtings

In [None]:
update_results('Rtings', 'over-ear', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Harman over-ear 2018 without bass.csv'))

In [None]:
update_results('Rtings', 'in-ear', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'))

In [None]:
update_results('Rtings', 'earbud', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'))

### Innerfidelity

In [None]:
update_results('Innerfidelity', 'over-ear', target=TARGETS_PATH.joinpath('HMS II.3 Harman over-ear 2018 without bass.csv'), new_only=True)

In [None]:
update_results('Innerfidelity', 'in-ear', target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'), new_only=True)

In [None]:
update_results('Innerfidelity', 'earbud', target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'), new_only=True)

### Headphone.com Legacy

In [None]:
update_results('Headphone.com Legacy', 'over-ear', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Harman over-ear 2018 without bass.csv'))

In [None]:
update_results('Headphone.com Legacy', 'in-ear', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'))

In [None]:
update_results('Headphone.com Legacy', 'earbud', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'))

### HypetheSonics

In [None]:
update_results('HypetheSonics', 'over-ear', new_only=True,
    target=TARGETS_PATH.joinpath('LMG 5128 0.6.csv'),
    bass_boost_gain=4.0)

In [None]:
update_results('HypetheSonics', 'in-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('HypetheSonics', 'data', 'in-ear', 'Bruel & Kjaer 5128'),
    output_dir=RESULTS_PATH.joinpath('HypetheSonics', 'Bruel & Kjaer 5128 in-ear'),
    target=TARGETS_PATH.joinpath('JM-1 with Harman treble filter.csv'),
    bass_boost_gain=6.5)

In [None]:
update_results('HypetheSonics', 'in-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('HypetheSonics', 'data', 'in-ear', 'GRAS RA0045'),
    output_dir=RESULTS_PATH.joinpath('HypetheSonics', 'GRAS RA0045 in-ear'))

In [None]:
update_results('HypetheSonics', 'earbud', new_only=True,
    target=TARGETS_PATH.joinpath('LMG 5128 0.6.csv'),
    bass_boost_gain=0.0)

### squig.link

In [None]:
update_results('Auriculares Argentina', 'over-ear', new_only=True)

In [None]:
update_results('Auriculares Argentina', 'in-ear', new_only=True)

In [None]:
update_results('Bakkwatan', 'in-ear', new_only=True)

In [None]:
update_results('DHRME', 'in-ear', new_only=True)

In [None]:
update_results('Fahryst', 'in-ear', new_only=True)

In [None]:
update_results('Filk', 'in-ear', new_only=True)

In [None]:
update_results('Filk', 'over-ear', new_only=True)

In [None]:
update_results('freeryder05', 'in-ear', new_only=True)

In [None]:
update_results('Harpo', 'in-ear', new_only=True)

In [None]:
update_results('Hi End Portable', 'in-ear', new_only=True)

In [None]:
update_results('Jaytiss', 'in-ear', new_only=True)

In [None]:
update_results('Kazi', 'in-ear', new_only=True)

In [None]:
update_results('Kazi', 'earbud', new_only=True)

In [None]:
update_results('kr0mka', 'over-ear', new_only=True)

In [None]:
update_results('kr0mka', 'in-ear', new_only=True)

In [None]:
update_results('kr0mka', 'earbud', new_only=True)

In [None]:
update_results('Kuulokenurkka', 'over-ear', new_only=True)

In [None]:
update_results('Regan Cipher', 'over-ear', new_only=True)

In [None]:
update_results('Regan Cipher', 'in-ear', new_only=True)

In [None]:
update_results('Regan Cipher', 'earbud', new_only=True)

In [None]:
update_results('RikudouGoku', 'in-ear', new_only=True)

In [None]:
update_results('Super Review', 'over-ear', new_only=True)

In [None]:
update_results('Super Review', 'in-ear', new_only=True)

In [None]:
update_results('Super Review', 'earbud', new_only=True)

In [None]:
update_results('Ted\'s Squig Hoard', 'in-ear', new_only=True)

In [None]:
update_results('ToneDeafMonk', 'in-ear', new_only=True)

## Update Indexes
Updates recommended results, full results, DB specific results, HeSuVi results and ranking table.

In [None]:
update_all_indexes()

## Deploy
1. Add files to Git, commit and push
2. Upload webapp data to server

# Sandbox
Don't run these! Random exploration while developing.

In [None]:
from pathlib import Path
from tqdm.auto import tqdm
import re
import requests
from selenium.webdriver.common.by import By
import json
from bs4 import BeautifulSoup
import numpy as np
import json
from autoeq.frequency_response import FrequencyResponse

In [None]:
crawler = CrinacleCrawler()
crawler.crawl()

for item in crawler.crawl_index:
    index_item = crawler.name_index.find_one(url=item.url)
    book = crawler.book_maps[item.url.split('/')[-2]]
    normalized_file_name = crawler.normalize_file_name(item.url.split('/')[-1])
    source_name = book[normalized_file_name] if normalized_file_name in book else None
    if index_item is not None and source_name is not None and index_item.source_name != source_name:
        print(f'{index_item.source_name} --> {source_name}')
        index_item.source_name = source_name
crawler.write_name_index()

In [None]:
print(crawler.crawl_index.find_one(url='file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt'))

In [None]:
print(crawler.name_index.find_one(url='file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt'))

In [None]:
measurements = list(MEASUREMENTS_PATH.glob('**/*.csv'))
for fp in tqdm(measurements):
    fr = FrequencyResponse.read_csv(fp)
    if np.sum(np.abs(fr.raw)) < 0.01:
        print(f'Removed "{fp}"')