In [2]:
import json
from imslp_scraping import get_all_composer_pieces, get_composer_url
from pieces import create_piece
from ydata_profiling import ProfileReport
from typing import List, Dict, Any
import pyarrow
import datetime
import pandas as pd
import numpy as np
import polars as pl

from pathlib import Path

file = Path("../full_df_20241211_225924.parquet")
df = pl.read_parquet(file)
filtered_df = (
    df.group_by("composer_name")
    .agg(pl.count("work_name").alias("count"))
    .filter(pl.col("count") > 10)
    .join(df, on="composer_name")
)
instrument_mapping = {
    "viol": "violin",
    "piano)": "piano",
    "(piano": "piano",
    "Piano Solo": "piano",
    "1 piano": "piano",
    "piano (no.12 = 2 voices": "piano",
    "piano (or harp (no.7 only)": "piano",
    "piano (nos.5-7)": "piano",
    "piano (arranged)": "piano",
    "2 horns)": "2 horns",
}

filtered_df = filtered_df.with_columns(
    pl.col("instrumentation")
    .list.eval(
        pl.when(pl.element().str.to_lowercase().is_in(instrument_mapping.keys()))
        .then(pl.element().str.to_lowercase().replace(instrument_mapping))
        .otherwise(pl.element().str.to_lowercase())
    )
    .alias("instrumentation")
)
unique_instruments = (
    filtered_df.select("instrumentation", "work_name", "composer_name", "imslp_url")
    .explode("instrumentation")
    .unique()
    .sort(by="work_name")
)

FileNotFoundError: No such file or directory (os error 2): ../full_df_20241211_225924.parquet

In [4]:
df.head(5)

work_name,composer_name,catalogue_desc_str,catalogue_type,catalogue_number,catalogue_number_secondary,catalogue_id,composition_year,composition_year_string,key_signature,movements,sub_piece_type,sub_piece_count,instrumentation,nickname,piece_style,imslp_url,wikipedia_url
str,str,str,str,i64,null,null,i64,str,str,str,str,i64,list[str],str,str,str,str
"""Poeme Satanique""","""Scriabin, Aleksandr""","""Op.36""","""op""",36,,,1903.0,"""1903""",,"""[]""",,,"[""piano""]",,"""early 20th century""","""https://imslp.org/wiki/Po%C3%A…",
"""Piano Concerto""","""Scriabin, Aleksandr""","""Op.20""","""op""",20,,,1896.0,"""1896""","""fsharpminor""","""[{""title"": ""I. Allegro"", ""numb…","""movements""",3.0,"[""piano"", ""orchestra""]",,"""romantic""","""https://imslp.org/wiki/Piano_C…","""http://en.wikipedia.org/wiki/P…"
"""2 Impromptus""","""Scriabin, Aleksandr""","""Op.12""","""op""",12,,,1895.0,"""1895""",,"""[{""title"": ""Presto"", ""number"":…","""movements""",2.0,"[""piano""]",,"""romantic""","""https://imslp.org/wiki/2_Impro…",
"""Préludes""","""Scriabin, Aleksandr""","""Op.16""","""op""",16,,,1895.0,"""1895""",,"""[{""title"": ""Andante"", ""number""…","""preludes""",5.0,"[""piano""]",,"""romantic""","""https://imslp.org/wiki/5_Prelu…",
"""3 Etudes""","""Scriabin, Aleksandr""","""Op.65""","""op""",65,,,,,,"""[{""title"": ""Allegro fantastico…","""etudes""",3.0,"[""piano""]",,"""early 20th century""","""https://imslp.org/wiki/3_Etude…",


In [2]:
from supabase_database import SupabaseDatabase

db = SupabaseDatabase()
try:
    successful, failed = db.bulk_insert_from_df(df)
    print(f"Successfully inserted {successful} pieces")
    if failed:
        print("Failed inserts:")
        for work_name, error in failed:
            print(f"- {work_name}: {error}")
finally:
    db.close()

Successfully inserted 14376 pieces
Failed inserts:
- Weimarer (Gothäer) Passion: invalid input value for enum catalogue_type: "bc"
LINE 9: ...   14460, 'Weimarer (Gothäer) Passion', 'BC D 1', 'bc', NULL...
                                                             ^

- Partita: invalid input value for enum catalogue_type: "krebs-wv"
LINE 9:             14479, 'Partita', 'Krebs-WV 825', 'krebs-wv', 82...
                                                      ^

- O Mensch bewein dein Sünde groß: invalid input value for enum catalogue_type: "p"
LINE 9: ...O Mensch bewein dein Sünde groß', 'P.396 ; T.61', 'p', 396, ...
                                                             ^

- Laß, Fürstin, laß noch einen Strahl: value too long for type character varying(255)

- Fantasia: invalid input value for enum catalogue_type: "kv"
LINE 9:             14508, 'Fantasia', 'KV 616a', 'kv', 616, NULL, N...
                                                  ^

- Horn Concerto: invalid input value 