In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
ROOT_PATH = Path().resolve().parent
if str(ROOT_PATH) not in sys.path:
    sys.path.insert(ROOT_PATH, 1)

In [3]:
import json
import ipywidgets as widgets
from IPython.display import display
from autoeq.constants import PEQ_CONFIGS, DEFAULT_BASS_BOOST_GAINS
from autoeq.batch_processing import batch_processing
ROOT_PATH = Path().resolve().parent
from dbtools.rtings_crawler import RtingsCrawler
from dbtools.crinacle_crawler import CrinacleCrawler
from dbtools.oratory1990_crawler import Oratory1990Crawler
from dbtools.innerfidelity_crawler import InnerfidelityCrawler
from dbtools.headphonecom_crawler import HeadphonecomCrawler
from dbtools.hypethe_sonics_crawler import HypetheSonicsCrawler
from dbtools.squig_crawler import SquigCrawler, SquigCrawlerManager
from dbtools.prune_results import prune_results
from dbtools.update_result_indexes import update_all_indexes
from dbtools.constants import TARGETS_PATH, MEASUREMENTS_PATH, RESULTS_PATH

# Crawling and Parsing
Additional Python packages are required for processing the measurements:
```bash
python -m pip install -U -r dbtools/requirements.txt
```

This notebook uses IPyWidgets
```bash
jupyter nbextension enable --py widgetsnbextension --sys-prefix
```

Finally install IPython kernel
```bash
python -m ipykernel install --user --name="autoeq"
```

Measurement crawlers require [Google Chrome](https://www.google.com/chrome/) installed and
[ChromeDriver](https://googlechromelabs.github.io/chrome-for-testing/) binary in the measurements folder (or anywhere
in the PATH).

Measurement crawlers also require C++. This should be installed by default on Linux but on Windows you need to install
Microsoft Visual Studio build tools for this. https://visualstudio.microsoft.com/downloads/ ->
"Tools for Visual Studio 2019" -> "Build Tools for Visual Studio 2019".

oratory1990 crawler requires Ghostscript installed: https://www.ghostscript.com/download/gsdnld.html

## Clear `phone_books` to Get Latest Measurements
crinacle, squig.link and HypetheSonics crawlers use `phone_book*.json` files. These should be deleted to allow crawlers to download fresh versions.

In [None]:
for fp in MEASUREMENTS_PATH.glob('**/phone_book*.json'):
    fp.unlink()

## Crinacle
Download measurement data from Drive folder to `measurements/crinacle/raw_data/` before running this!

* `IEM Measurements/IEC60318-4 IEM Measurements (TSV txt)` into `AutoEq/measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)`
* `IEM Measurements/4620 IEM Measurements` into `AutoEq/measurements/crinacle/raw_data/4620 IEM Measurements`
* `HP Measurements/EARS + 711 (TSV txt) (Legacy)` into `AutoEq/measurements/crinacle/raw_data/EARS + 711 (TSV txt) (Legacy)`
* `GRAS 43AG-7` into `AutoEq/measurements/crinacle/raw_data/GRAS 43AG-7`

In [None]:
crawler = CrinacleCrawler()

In [None]:
crawler.run()

In [None]:
display(crawler.widget)

In [None]:
crawler.process(new_only=True)

## oratory1990
oratory1990 crawler fetches all measurements from https://www.reddit.com/r/oratory1990/wiki/index/list_of_presets/, downloads PDFs and reads the frequency response measurement data from the PDFs. Parsing the PDFs requires [Ghostscript](https://www.ghostscript.com/download/gsdnld.html) to be installed on the system.

In [69]:
crawler = Oratory1990Crawler()

#### Delete Existing PDFs
Optionally delete existing PDFs to get latest measurements for existing items

In [None]:
for fp in crawler.measurements_path.joinpath('pdf').glob('*.pdf'):
    fp.unlink()

#### Crawl

In [None]:
crawler.run()

In [None]:
display(crawler.widget)

#### Process
This downloads PDF (if it doesn't exist), converts it to an image using Ghostscript and parses the image.

Set `new_only=False` to reprocess all existing measurements

In [70]:
crawler.process(new_only=True)

  0%|          | 0/736 [00:00<?, ?it/s]

## Rtings
Rtings crawler fetches all measurements from https://www.rtings.com/headphones/1-[2,4,5,6,7,8]/graph and downloads raw FR JSON files and parses them.

Rtings has updated to Bruel & Kjaer 5128 measurement system and are distributing data under v1.8 methodology. They occasionally remeasure old headphones and therefore crawler removes items from the name index that have been remeasured. Measurement files can be deleted with `crawler.prune_measurements()` after that and results with `prune_results()`

In [62]:
crawler = RtingsCrawler()

In [None]:
crawler.run()

In [None]:
display(crawler.widget)

In [None]:
crawler.process(new_only=False)

## HypetheSonics

In [None]:
crawler = HypetheSonicsCrawler()

In [None]:
crawler.run()

In [None]:
display(crawler.widget)

In [None]:
crawler.process(new_only=True)

## Squig.link
Several databases included in squig.link

**TODO:** rig information for cralwers, somehow

In [5]:
manager = SquigCrawlerManager()
#print(', '.join(sorted([crawler.username for crawler in manager.crawlers])))
#print(json.dumps({crawler.username: crawler.name for crawler in sorted(manager.crawlers, key=lambda c: c.username)}, indent=4, ensure_ascii=False))

In [None]:
display(manager.run('Auriculares Argentina').widget)

In [None]:
manager.process('Auriculares Argentina', new_only=True)

In [None]:
display(manager.run('Bakkwatan').widget)

In [None]:
manager.process('Bakkwatan', new_only=True)

In [None]:
display(manager.run('Hi End Portable').widget)

In [None]:
manager.process('Hi End Portable', new_only=True)

**TODO:** DHRME over-ear rig

In [None]:
display(manager.run('DHRME').widget)

In [None]:
manager.process('DHRME', new_only=True)

In [None]:
display(manager.run('Fahryst').widget)

In [None]:
manager.process('Fahryst', new_only=True)

In [None]:
display(manager.run('Filk').widget)

In [None]:
manager.process('Filk', new_only=True)

In [None]:
display(manager.run('freeryder05').widget)

In [None]:
manager.process('freeryder05', new_only=True)

In [None]:
# Not in squigsites.json
#display(manager.run('gadgetgenerations').widget)

In [None]:
display(manager.run('Harpo').widget)

In [None]:
manager.process('Harpo', new_only=True)

In [None]:
# Rig not known yet
#display(manager.run('ish').widget)

In [None]:
# Rig not known yet
#display(manager.run('jacstone').widget)

In [None]:
display(manager.run('Jaytiss').widget)

In [None]:
manager.process('Jaytiss', new_only=True)

In [None]:
display(manager.run('Kazi').widget)

In [None]:
manager.process('Kazi', new_only=True)

In [None]:
display(manager.run('kr0mka').widget)

In [None]:
manager.process('kr0mka', new_only=True)

In [None]:
display(manager.run('Kuulokenurkka').widget)

In [22]:
manager.process('Kuulokenurkka', new_only=True)

  0%|          | 0/266 [00:00<?, ?it/s]

In [None]:
# Not in squigsites.json
#display(manager.run('mini').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('psykano').widget)

In [None]:
display(manager.run('Regan Cipher').widget)

In [28]:
manager.process('Regan Cipher', new_only=True)

  0%|          | 0/194 [00:00<?, ?it/s]

Failed to parse "C:\Users\jaakko\code\AutoEq\measurements\Regan Cipher\raw_data\in-ear\Zigaat Estrella L.txt": Numeric lines have different number of columns


In [None]:
display(manager.run('RikudouGoku').widget)

In [8]:
manager.process('RikudouGoku', new_only=True)

  0%|          | 0/200 [00:00<?, ?it/s]

In [None]:
# Not in squigsites.json
#display(manager.run('sdlib').widget)

In [None]:
# Almost all measurements are with 3rd party eartips
#display(manager.run('smirk').widget)

In [None]:
# Rig not known yet
#display(manager.run('soundcheck39').widget)

In [None]:
display(manager.run('Super Review').widget)

In [23]:
manager.process('Super Review', new_only=True)

  0%|          | 0/885 [00:00<?, ?it/s]

In [24]:
display(manager.run('Ted\'s Squig Hoard').widget)

HBox(children=(VBox(layout=Layout(max_height='600px', overflow='auto', width='324px')), VBox()))

In [None]:
manager.process('Ted\'s Squig Hoard', new_only=True)

In [None]:
# Not in squigsites.json
#display(manager.run('therollo9').widget)

In [None]:
display(manager.run('ToneDeafMonk').widget)

In [32]:
manager.process('ToneDeafMonk', new_only=True)

  0%|          | 0/433 [00:00<?, ?it/s]

In [None]:
# Not in squigsites.json
#display(manager.run('wdym').widget)

In [None]:
# Only 3 models
#display(manager.run('yanyin').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('zamo').widget)

## Rename Measurements
Sometimes measurements are named incorrectly or previously only one sample existed and now multiple samples have been measured and so the original one needs to be renamed as "<name> (sample 1)"

In [None]:
crinacle = CrinacleCrawler()
headphonecom = HeadphonecomCrawler()
innerfidelity = InnerfidelityCrawler()
oratory1990 = Oratory1990Crawler()
rtings = RtingsCrawler(driver=oratory1990.driver)
hypethesonics = HypetheSonicsCrawler()

renames = [
    {'old_name': 'HEDD Heddphone 2', 'new_name': 'HEDD HEDDphone TWO', 'crawlers': [manager.crawler('kr0mka'), oratory1990, crinacle]},
    {'old_name': 'HEDD Heddphone', 'new_name': 'HEDD HEDDphone ONE', 'crawlers': [manager.crawler('kr0mka'), manager.crawler('Auriculares Argentina'), hypethesonics, oratory1990, crinacle]},
]
for rename in renames:
    for crawler in rename['crawlers']:
        crawler.rename_measurement(old_name=rename['old_name'], new_name=rename['new_name'], dry_run=True)

## Prune Results
Check if obsolete results (e.g. because of renaming) exist and remove them

In [71]:
prune_results(databases=['oratory1990', 'rtings',], dry_run=True)

## Update Results
Creates new results from the measurements

The `update_results()` function below is a short-hand for setting the parameters corrently.

* `target` is the name of the target curve. A version without bass boost should be chosen as the bass shelf is selected automatically based on the target.
* `rig` must be provided if the source database has measurements with multiple different rigs and the directories for the source are organized by rig.

In [33]:
def update_results(
    source_db,
    form,
    target,
    rig=None,
    **override_kwargs
):  
    input_dir = MEASUREMENTS_PATH.joinpath(source_db, 'data', form)
    if rig is not None:
        input_dir = input_dir.joinpath(rig)
    kwargs = {
        'input_dir': input_dir,
        'output_dir': RESULTS_PATH.joinpath(source_db, f'{rig} {form}' if rig is not None else form),
        'target': TARGETS_PATH.joinpath(target.replace('.csv', '') + '.csv'),
        'bass_boost_gain': DEFAULT_BASS_BOOST_GAINS[target],
        'bass_boost_fc': 105, 'bass_boost_q': 0.7,
        'parametric_eq': True, 'ten_band_eq': True, 'convolution_eq': True,
        'parametric_eq_config': [PEQ_CONFIGS['4_PEAKING_WITH_LOW_SHELF'], PEQ_CONFIGS['4_PEAKING_WITH_HIGH_SHELF']],
        'fs': [44100, 48000],
        'thread_count': 0,
    }
    if override_kwargs:
        kwargs.update(override_kwargs)
    #for key, val in kwargs.items():
    #    print(f'{key}: {val}')
    batch_processing(**kwargs)

### oratory1990

In [73]:
update_results('oratory1990', 'over-ear', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/16 [00:00<?, ?it/s]

In [74]:
update_results('oratory1990', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/4 [00:00<?, ?it/s]

In [75]:
update_results('oratory1990', 'earbud', 'AutoEq in-ear', new_only=True)

  0%|          | 0/1 [00:00<?, ?it/s]

### crinacle

In [None]:
update_results('crinacle', 'over-ear', 'crinacle EARS + 711 Harman over-ear 2018 without bass', rig='EARS + 711', new_only=True)

In [None]:
update_results('crinacle', 'over-ear', 'Harman over-ear 2018 without bass', rig='GRAS 43AG-7', new_only=True)

In [None]:
update_results('crinacle', 'in-ear', 'JM-1 with Harman treble filter', rig='Bruel & Kjaer 4620', new_only=True)

In [None]:
update_results('crinacle', 'in-ear', 'AutoEq in-ear', rig='711', new_only=True)

### Rtings

In [76]:
update_results('Rtings', 'over-ear', 'HMS II.3 Harman over-ear 2018 without bass', rig='HMS II.3', new_only=True)

0it [00:00, ?it/s]

In [77]:
update_results('Rtings', 'over-ear', 'LMG 5128 0.6 without bass', rig='Bruel & Kjaer 5128', new_only=True)

0it [00:00, ?it/s]

In [78]:
update_results('Rtings', 'in-ear', 'HMS II.3 AutoEq in-ear', rig='HMS II.3', new_only=True)

0it [00:00, ?it/s]

In [79]:
update_results('Rtings', 'in-ear', 'JM-1 with Harman treble filter', rig='Bruel & Kjaer 5128', new_only=True)

0it [00:00, ?it/s]

In [80]:
update_results('Rtings', 'earbud', 'HMS II.3 AutoEq in-ear', rig='HMS II.3', new_only=True)

0it [00:00, ?it/s]

In [81]:
update_results('Rtings', 'earbud', 'LMG 5128 0.6 without bass', rig='Bruel & Kjaer 5128', new_only=True)

0it [00:00, ?it/s]

### Innerfidelity

In [None]:
update_results('Innerfidelity', 'over-ear', 'HMS II.3 Harman over-ear 2018 without bass', new_only=True)

In [None]:
update_results('Innerfidelity', 'in-ear', 'HMS II.3 Autoeq in-ear', new_only=True)

In [None]:
update_results('Innerfidelity', 'earbud', 'HMS II.3 Autoeq in-ear', new_only=True)

### Headphone.com Legacy

In [None]:
update_results('Headphone.com Legacy', 'over-ear', 'HMS II.3 Harman over-ear 2018 without bass', new_only=True)

In [None]:
update_results('Headphone.com Legacy', 'in-ear', 'HMS II.3 Autoeq in-ear', new_only=True)

In [None]:
update_results('Headphone.com Legacy', 'earbud', 'HMS II.3 Autoeq in-ear', new_only=True)

### HypetheSonics

In [None]:
update_results('HypetheSonics', 'over-ear', 'LMG 5128 0.6 without bass', new_only=True)

In [None]:
update_results('HypetheSonics', 'in-ear', 'JM-1 with Harman treble filter', rig='Bruel & Kjaer 5128', new_only=True)

In [None]:
update_results('HypetheSonics', 'in-ear', 'Harman over-ear 2018 without bass', rig='GRAS RA0045', new_only=True)

In [None]:
update_results('HypetheSonics', 'earbud',' LMG 5128 0.6 without bass', new_only=True)

### squig.link

In [85]:
update_results('Auriculares Argentina', 'over-ear', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/22 [00:00<?, ?it/s]

In [86]:
update_results('Auriculares Argentina', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/2 [00:00<?, ?it/s]

In [87]:
update_results('Bakkwatan', 'in-ear', 'AutoEq in-ear', new_only=True)

0it [00:00, ?it/s]

In [88]:
update_results('DHRME', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/58 [00:00<?, ?it/s]

In [89]:
update_results('Fahryst', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/50 [00:00<?, ?it/s]

In [90]:
update_results('Filk', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/54 [00:00<?, ?it/s]

In [91]:
update_results('Filk', 'over-ear', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/19 [00:00<?, ?it/s]

In [92]:
update_results('freeryder05', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/5 [00:00<?, ?it/s]

In [93]:
update_results('Harpo', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/12 [00:00<?, ?it/s]

In [94]:
update_results('Hi End Portable', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/67 [00:00<?, ?it/s]

In [95]:
update_results('Jaytiss', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/275 [00:00<?, ?it/s]

In [96]:
update_results('Kazi', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/46 [00:00<?, ?it/s]

In [97]:
update_results('Kazi', 'earbud', 'AutoEq in-ear', new_only=True)

0it [00:00, ?it/s]

In [35]:
update_results('kr0mka', 'over-ear', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/19 [00:00<?, ?it/s]

In [36]:
update_results('kr0mka', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/11 [00:00<?, ?it/s]

In [37]:
update_results('kr0mka', 'earbud', 'AutoEq in-ear', new_only=True)

  0%|          | 0/4 [00:00<?, ?it/s]

In [38]:
update_results('Kuulokenurkka', 'over-ear', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/101 [00:00<?, ?it/s]

In [39]:
update_results('Regan Cipher', 'over-ear', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/7 [00:00<?, ?it/s]

In [40]:
update_results('Regan Cipher', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/66 [00:00<?, ?it/s]

In [41]:
update_results('Regan Cipher', 'earbud', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/13 [00:00<?, ?it/s]

In [42]:
update_results('RikudouGoku', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/5 [00:00<?, ?it/s]

In [43]:
update_results('Super Review', 'over-ear', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/65 [00:00<?, ?it/s]

In [44]:
update_results('Super Review', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/164 [00:00<?, ?it/s]

In [45]:
update_results('Super Review', 'earbud', 'Harman over-ear 2018 without bass', new_only=True)

  0%|          | 0/7 [00:00<?, ?it/s]

In [46]:
update_results('Ted\'s Squig Hoard', 'in-ear', 'AutoEq in-ear', new_only=True)

0it [00:00, ?it/s]

In [47]:
update_results('ToneDeafMonk', 'in-ear', 'AutoEq in-ear', new_only=True)

  0%|          | 0/172 [00:00<?, ?it/s]

## Update Indexes
Updates recommended results, full results, DB specific results, HeSuVi results and ranking table.

In [None]:
update_all_indexes()

Creating ranking index...


  0%|          | 0/6033 [00:00<?, ?it/s]

Creating recommendations index...
Creating full index...
Creating source indices...
Creating HeSuVi ZIP archive...


  0%|          | 0/6033 [00:00<?, ?it/s]

  return self._open_to_write(zinfo, force_zip64=force_zip64)
  return self._open_to_write(zinfo, force_zip64=force_zip64)
  return self._open_to_write(zinfo, force_zip64=force_zip64)
  return self._open_to_write(zinfo, force_zip64=force_zip64)


Creating webapp data...


  0%|          | 0/6033 [00:00<?, ?it/s]

## Deploy
1. Add files to Git, commit and push
2. Upload webapp data to server

# Sandbox
Don't run these! Random exploration while developing.

In [None]:
from pathlib import Path
from tqdm.auto import tqdm
import re
import requests
from selenium.webdriver.common.by import By
import json
from bs4 import BeautifulSoup
import numpy as np
import json
from autoeq.frequency_response import FrequencyResponse

In [None]:
crawler = CrinacleCrawler()
crawler.crawl()

for item in crawler.crawl_index:
    index_item = crawler.name_index.find_one(url=item.url)
    book = crawler.book_maps[item.url.split('/')[-2]]
    normalized_file_name = crawler.normalize_file_name(item.url.split('/')[-1])
    source_name = book[normalized_file_name] if normalized_file_name in book else None
    if index_item is not None and source_name is not None and index_item.source_name != source_name:
        print(f'{index_item.source_name} --> {source_name}')
        index_item.source_name = source_name
crawler.write_name_index()

In [None]:
print(crawler.crawl_index.find_one(url='file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt'))

In [None]:
print(crawler.name_index.find_one(url='file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt'))

In [None]:
measurements = list(MEASUREMENTS_PATH.glob('**/*.csv'))
for fp in tqdm(measurements):
    fr = FrequencyResponse.read_csv(fp)
    if np.sum(np.abs(fr.raw)) < 0.01:
        print(f'Removed "{fp}"')