In [1]:
import configparser
import datetime as dt
import sys
import collections
from pathlib import Path
from typing import Union, Optional, List, Tuple

import codebook.EDA as EDA
import codebook.clean as clean
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy

In [2]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('raph-base')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 800)

np.random.seed(666)

In [3]:
Path.cwd().parent / 'src'

WindowsPath('C:/Users/r2d4/OneDrive/code/projects/20-02_disco/src')

In [4]:
sys.path.append(str(Path.cwd().parent))

from src.db_declaration import (
    Base, Artist, CreditTrx, Genre, Label, Record, RecordFormat, CreditTrx, LabelRecordLink, ArtistLabelLink
)
from src import db_functions
from src import db_connect

In [5]:
print(sys.executable)
print(sys.version)
print(f'sqlalchemy {sqlalchemy.__version__}')

C:\Users\r2d4\miniconda3\envs\py3\python.exe
3.8.3 (default, May 19 2020, 06:50:17) [MSC v.1916 64 bit (AMD64)]
sqlalchemy 1.3.17


## Connect To DB

Because we connect from `dev`subfolder, I have to work with the sqlalchemy `create_engine` function directly.

In [6]:
# session.close()
prod_path = r"sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db"
engine = sqlalchemy.create_engine(prod_path)

session = db_connect.create_session(engine)

# Check
session

<sqlalchemy.orm.session.Session at 0x1eee7b529a0>

## Query DB

In [7]:
pd.read_sql("records", engine).tail(5)

Unnamed: 0,record_id,title,year,genre_id,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,rating,is_digitized,is_active,created_at,updated_at
325,326,Septic Decadence,2021,1,6,yellow with red splatter,,,2nd press of 2020 record,2021-04-11,10,,1,1,2021-04-13 15:19:56,2021-05-05 19:24:25
326,327,The Delta Death Sirens,2021,1,1,swamp green,300.0,,,2021-04-29,15,,1,1,2021-04-29 19:31:38,NaT
327,328,Split,2021,1,3,ultra-clear with electric blue & green glow-in-the-dark splatter,150.0,,,2021-04-27,20,,0,1,2021-04-29 19:39:27,NaT
328,329,World At War,2021,5,3,clear red inside clear yellow,400.0,,,2021-05-05,20,,0,1,2021-05-05 18:56:23,NaT
329,330,Good To Feel,2018,5,3,cloudy green,500.0,,3rd press,2021-05-05,20,,1,1,2021-05-05 18:59:31,NaT


In [8]:
pd.read_sql("records", engine)["is_active"].sum()

299

In [9]:
pd.read_sql("artists", engine).tail

<bound method NDFrame.tail of      artist_id      artist_name artist_country          created_at          updated_at
0            1        Dismember         Sweden 2021-02-24 14:24:16                 NaT
1            2        Merciless         Sweden 2021-02-24 14:24:16 2021-02-28 16:57:22
2            3         Overkill            USA 2021-02-24 14:24:16 2021-02-24 14:44:37
3            4         Entombed         Sweden 2021-02-24 14:24:16                 NaT
4            5        Metallica            USA 2021-02-24 14:24:16 2021-02-28 16:59:06
..         ...              ...            ...                 ...                 ...
215        216        Coagulate            USA 2021-04-29 19:39:27                 NaT
216        217  Soul Devourment            USA 2021-04-29 19:39:27                 NaT
217        218          Gutvoid         Canada 2021-04-29 19:39:27                 NaT
218        219        Dead Heat            USA 2021-05-05 18:56:23                 NaT
219        22

In [10]:
pd.read_sql("credit_trx", engine).tail(10)

Unnamed: 0,credit_trx_id,credit_trx_date,credit_trx_type,credit_value,credit_saldo,record_id,created_at,updated_at
344,345,2020-11-06,Purchase,0.0,-3.0,324.0,2021-04-09 16:22:47,NaT
345,346,2021-04-11,Purchase,0.0,-3.0,325.0,2021-04-13 15:12:04,NaT
346,347,2021-04-13,Purchase,0.0,-3.0,326.0,2021-04-13 15:19:56,NaT
347,348,2021-04-19,Addition,1.0,-2.0,,2021-04-19 11:50:19,NaT
348,349,2021-04-29,Purchase,-1.0,-3.0,327.0,2021-04-29 19:31:38,NaT
349,350,2021-04-29,Addition,1.0,-2.0,,2021-04-29 19:31:57,NaT
350,351,2021-04-27,Purchase,-1.0,-3.0,328.0,2021-04-29 19:39:27,NaT
351,352,2021-04-30,Remove,1.0,-2.0,269.0,2021-04-30 20:32:47,NaT
352,353,2021-05-05,Purchase,-1.0,-3.0,329.0,2021-05-05 18:56:23,NaT
353,354,2021-05-05,Purchase,-1.0,-4.0,330.0,2021-05-05 18:59:31,NaT


In [11]:
pd.read_sql("record_label_link", engine).tail()
pd.read_sql("artist_label_link", engine).tail()
pd.read_sql("artist_genre_link", engine).tail()
pd.read_sql("genre_label_link", engine).tail()

Unnamed: 0,record_id,label_id
303,304,25
304,305,33
305,306,33
306,307,33
307,308,45


Unnamed: 0,artist_id,label_id
317,216,33
318,217,33
319,218,33
320,219,21
321,220,21


Unnamed: 0,artist_id,genre_id
218,216,1
219,217,1
220,218,1
221,219,5
222,220,5


Unnamed: 0,genre_id,label_id
117,5,106
118,5,107
119,5,108
120,1,109
121,1,110


In [12]:
pd.read_sql("labels", engine).sort_values("label_name")

Unnamed: 0,label_id,label_name,created_at,updated_at
17,18,20 Buck Spin,2021-02-24 14:24:22,NaT
27,28,6131,2021-02-24 14:24:23,NaT
89,90,Abfall,2021-02-28 17:49:06,NaT
91,92,Acouphènes!,2021-02-28 17:50:46,NaT
9,10,Active Rebellion,2021-02-24 14:24:20,NaT
...,...,...,...,...
49,50,Vinyl Collectors,2021-02-24 16:31:31,NaT
73,74,Virgin,2021-02-28 11:24:25,NaT
69,70,War Anthem,2021-02-28 10:48:59,NaT
64,65,Witching Hour Productions,2021-02-26 17:07:08,NaT


In [13]:
result = session.query(Record).filter(Record.artists.any(Artist.artist_name == "Entombed")).all()
for record in result:
    print(record, record.purchase_date)

<Record(record_id=11, title=Uprising, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2013-07-01
<Record(record_id=12, title=Morning Star, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2013-07-01
<Record(record_id=23, title=Wolverine Blues, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2014-01-01
<Record(record_id=36, title=Clandestine, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2015-08-01
<Record(record_id=40, title=Left Hand Path, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2015-10-01


  util.warn(


In [14]:
result = session.query(Artist).filter(Artist.artist_name == "Entombed").one()
print(result.artist_country)

Sweden


In [15]:
pd.read_sql("formats", engine)

Unnamed: 0,format_id,format_name,created_at,updated_at
0,1,"12""",2021-02-24 14:24:16,NaT
1,2,"Pic-7""",2021-02-24 14:24:16,2021-02-24 14:37:30
2,3,LP,2021-02-24 14:24:16,NaT
3,4,2LP,2021-02-24 14:24:16,NaT
4,5,"10""",2021-02-24 14:24:16,NaT
5,6,"7""",2021-02-24 14:24:16,NaT
6,7,MLP,2021-02-24 14:24:17,NaT
7,8,2xLP,2021-02-24 14:24:17,NaT
8,9,LP,2021-02-24 14:24:20,NaT
9,10,Pic-LP,2021-02-24 14:24:21,NaT


In [16]:
pd.read_sql("genres", engine)

Unnamed: 0,genre_id,genre_name,created_at,updated_at
0,1,Death Metal,2021-02-24 14:24:16,NaT
1,2,Thrash Metal,2021-02-24 14:24:16,NaT
2,3,Black Metal,2021-02-24 14:24:16,NaT
3,4,Speed Metal,2021-02-24 14:24:17,NaT
4,5,Hardcore,2021-02-24 14:24:18,NaT
5,6,Punk,2021-02-24 14:24:19,NaT
6,7,Crust,2021-02-24 14:24:19,NaT
7,8,Crossover,2021-02-24 14:24:24,NaT
8,9,Grindcore,2021-02-24 14:24:25,NaT


## Inactivate A Record (Temporary Section)

In [17]:
# removal = {
#     "trx_type": "Removal",
#     "credit_value": 0,
#     "artist": "Furia",
#     "title": "Marzannie, Królowej Polski",
#     "year": 2012,
#     "date": dt.datetime.today().date()
# }

In [18]:
# db_functions.set_record_to_inactive(session, removal)

In [19]:
# pd.read_sql("credit_trx", engine).tail()

In [20]:
# to_delete = session.query(CreditTrx).filter(CreditTrx.credit_trx_id == 301).one()
# session.delete(to_delete)
# session.commit()

## Backfill Missing Record-Format-Relations in Records Table (Done)

In [21]:
# def load_albums_from_xlsx(
#     filepath: Union[Path, str], genres: Optional[List] = None
# ) -> pd.DataFrame:
#     """Load the original album collection file into a dataframe.
#     You can specify a list of genres you want to include
#     (defaults to None).
#     """
#     df = pd.read_excel(filepath, engine="openpyxl")
#     if genres:
#         df = df[df["Genre"].isin(genres)]
#     return df

In [22]:
# CONFIG_PATH = Path.cwd().parent / "config.cfg"
# path_to_collection = utils.read_config_return_str(CONFIG_PATH, "COLLECTION")
# collection_df = load_albums_from_xlsx(path_to_collection)
# collection_df = clean.prettify_column_names(collection_df)
# collection_df.dropna(thresh=8, inplace=True)
# collection_df.rename(columns={"format": "record_format"}, inplace=True)
# # collection_df["purchase_date"] = collection_df["purchase_date"].dt.date.astype(str)
# # collection_df["number"] = collection_df["number"].astype(str)
# # collection_df["digitized"] = collection_df["digitized"].map({True: 1, False: 0}).astype(int)
# collection_df["label"].fillna("NA", inplace=True)  # because it creates a foreign key

# # This is for the initial ingestion only
# collection_df["artist_country"] = None
# collection_df["credit_value"] = 0
# collection_df["trx_type"] = "Initial Load"

# assert len(collection_df) == 293

In [23]:
# # backfill

# for rec in collection_df.itertuples():
#     record = db_functions.fetch_a_record_from_the_shelf(session, rec.artist, rec.title)
#     r_format = rec.record_format
    
#     # Check if the format already exists or has to be created
#     record_format = (
#         session.query(RecordFormat)
#         .filter(RecordFormat.format_name.ilike(r_format))
#         .one_or_none()
#     )
#     if record_format is None:
#         record_format = RecordFormat(format_name=r_format)
#         session.add(record_format)
    
#     record.record_format = record_format

# session.commit()

In [24]:
# # Manual Update

# record = session.query(Record).filter(Record.record_id == 295).one()
# r_format = '12"'

# # Check if the format already exists or has to be created
# record_format = (
#     session.query(RecordFormat)
#     .filter(RecordFormat.format_name.ilike(r_format))
#     .one_or_none()
# )
# if record_format is None:
#     record_format = RecordFormat(format_name=r_format)
#     session.add(record_format)

# record.record_format = record_format

# session.commit()

## Manual Updates

### Credit Trx

In [25]:
# result = session.query(CreditTrx).filter(CreditTrx.credit_trx_type == "Removal").all()
# for r in result:
#     r.credit_trx_type = "Remove"
#     print(r)

In [26]:
# r = session.query(Artist).filter(Artist.artist_name == "CELESTIAL SANCTUARY").one()
# r.artist_name = "Celestial Sanctuary"

In [27]:
# session.commit()

### Remove "Records" From Label-Names And Split Labels

In [28]:
# result = session.query(Label).filter(Label.label_name.like('% Records')).all()
# for x in result:
#     x.label_name = x.label_name.rpartition(" Records")[0]
# session.commit()

In [29]:
# session.query(Label).get(42)

In [30]:
# result = session.query(Label).filter(Label.label_name.contains(' / ')).all()
# for x in result:
#     print(x)

In [31]:
# result = session.query(Record).filter(Record.labels.any(Label.label_id.in_([19, 31, 32])))
# for x in result:
#     print(x)

In [32]:
# jack = session.query(Label).filter(Label.label_id.in_([19, 31, 32])).all()
# for x in jack:
#     print(x)
#     session.delete(x)
# session.commit()

In [33]:
# session.close()

### Check for "None" str Values in Ratings and Fix Them

In [34]:
# result = session.query(Record).filter(Record.rating == "None").all()
# for record in result:
#     print(record)
#     record.rating = None
# session.commit()

### Update Stuff

In [39]:
result = session.query(Record).filter(Record.artists.any(Artist.artist_country == "NA")).first()
result

<Record(record_id=273, title=Speciation, artist=[<Artist(artist_id=172, artist_name=Faceless Burial)>])>

In [36]:
# result = session.query(Artist).filter(Artist.artist_name == "Insurgency").first()
# result.artist_name = "Resurgency"
# session.commit()

In [37]:
# session.close()
# session = db_connect.create_session(engine)

In [38]:
result = session.query(Record).filter(Record.vinyl_color == 'None').all()
len(result)

0