In [1]:
import configparser
import datetime as dt
import sys
import collections
from pathlib import Path
from typing import Union, Optional, List, Tuple

import codebook.EDA as EDA
import codebook.clean as clean
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy

In [2]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('raph-base')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 800)

np.random.seed(666)

In [3]:
Path.cwd().parent / 'src'

WindowsPath('C:/Users/r2d4/OneDrive/code/projects/20-02_disco/src')

In [4]:
sys.path.append(str(Path.cwd().parent))

from src.db_declaration import (
    Base, Artist, CreditTrx, Genre, Label, Record, RecordFormat, CreditTrx, RecordLabelLink, ArtistLabelLink
)
from src import db_functions
from src import utils

In [5]:
print(sys.executable)
print(sys.version)
print(f'sqlalchemy {sqlalchemy.__version__}')

C:\Users\r2d4\miniconda3\envs\py3\python.exe
3.8.3 (default, May 19 2020, 06:50:17) [MSC v.1916 64 bit (AMD64)]
sqlalchemy 1.3.17


## Connect To DB

In [6]:
engine = utils.create_engine(r"C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db")
session = utils.create_session(engine)

C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db


## Query DB

In [27]:
pd.read_sql("records", engine).sample(3)

Unnamed: 0,record_id,artist_id,title,genre_id,label,year,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,digitized,rating,active,created_at,updated_at
256,257,162,Forms Of Unreasoning Fear,1,,2019,3.0,,400,,"(total 500, first 100 bone)",2020-04-01,30,0,9.0,1,2021-01-09 11:40:29,2021-01-13 07:17:51
290,291,94,A New Realm Of Misery,5,,2019,1.0,pink green,100,,,2020-10-01,50,0,,1,2021-01-09 11:40:30,2021-01-13 07:17:51
187,188,103,Boulevard of Broken Bones,6,,2012,3.0,red,125,,out of 500,2018-09-01,25,1,8.0,1,2021-01-09 11:40:26,2021-01-13 07:17:51


In [36]:
pd.read_sql("records", engine).tail()

Unnamed: 0,record_id,artist_id,title,genre_id,label,year,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,digitized,rating,active,created_at,updated_at
292,293,188,We Move As One,5,,2020,3,clear,,,,2020-12-01,20,1,,1,2021-01-09 11:40:30,2021-01-13 07:17:51
293,294,189,Crypt Of Ice,1,,2021,3,lilac,200,,,2020-12-12,25,0,,1,2021-01-09 11:42:41,2021-01-13 07:25:26
294,295,190,Ratlord,1,,2017,1,"random colored, w/ b-side silkscreenprint in white",lim,,,2021-01-07,15,0,,1,2021-01-09 11:46:04,2021-01-13 07:25:44
295,296,191,I Am Your God,5,,2020,3,white / black,200,,,2021-01-09,20,1,,1,2021-01-09 12:47:46,2021-01-13 07:25:19
296,297,192,Anti Life Anti You,5,,2020,3,orange with black swirl,200,,,2021-01-09,20,1,,1,2021-01-09 12:50:27,2021-01-13 07:25:12


In [9]:
pd.read_sql("records", engine)["active"].sum()

280

In [10]:
pd.read_sql("artists", engine).tail

<bound method NDFrame.tail of      artist_id       artist_name artist_country          created_at updated_at
0            1         Dismember           None 2021-01-09 11:40:20        NaT
1            2         Merciless           None 2021-01-09 11:40:20        NaT
2            3          Overkill           None 2021-01-09 11:40:20        NaT
3            4          Entombed           None 2021-01-09 11:40:20        NaT
4            5         Metallica           None 2021-01-09 11:40:20        NaT
..         ...               ...            ...                 ...        ...
187        188        Ironed Out           None 2021-01-09 11:40:30        NaT
188        189       Frozen Soul            USA 2021-01-09 11:42:41        NaT
189        190           Ratlord        Germany 2021-01-09 11:46:04        NaT
190        191              Xile    New Zealand 2021-01-09 12:47:46        NaT
191        192  Dead End Tragedy        Germany 2021-01-09 12:50:27        NaT

[192 rows x 5 columns

In [11]:
pd.read_sql("credit_trx", engine).tail()

Unnamed: 0,credit_trx_id,credit_trx_date,credit_trx_type,credit_value,credit_saldo,record_id,created_at,updated_at
295,296,2021-01-07,Purchase,-1.0,-2.0,295.0,2021-01-09 11:46:04,NaT
296,297,2021-01-09,Addition,1.0,-1.0,,2021-01-09 11:51:46,NaT
297,298,2021-01-09,Purchase,-1.0,-2.0,296.0,2021-01-09 12:47:46,NaT
298,299,2021-01-09,Purchase,-1.0,-3.0,297.0,2021-01-09 12:50:27,NaT
299,300,2021-01-09,Removal,0.0,-3.0,83.0,2021-01-09 21:13:27,NaT


In [12]:
pd.read_sql("record_label_link", engine).tail()
pd.read_sql("artist_label_link", engine).tail()
pd.read_sql("artist_genre_link", engine).tail()
pd.read_sql("genre_label_link", engine).tail()

Unnamed: 0,record_id,label_id
292,293,41
293,294,6
294,295,42
295,296,23
296,297,23


Unnamed: 0,artist_id,label_id
198,188,41
199,189,6
200,190,42
201,191,23
202,192,23


Unnamed: 0,artist_id,genre_id
189,188,5
190,189,1
191,190,1
192,191,5
193,192,5


Unnamed: 0,genre_id,label_id
46,5,38
47,5,39
48,5,40
49,5,41
50,1,42


In [13]:
pd.read_sql("labels", engine).sort_values("label_name")

Unnamed: 0,label_id,label_name,created_at,updated_at
14,15,20 Buck Spin,2021-01-09 11:40:27,NaT
23,24,6131 Records,2021-01-09 11:40:28,NaT
17,18,BBB Records,2021-01-09 11:40:27,NaT
22,23,BDHW Records,2021-01-09 11:40:28,NaT
19,20,Back on Black,2021-01-09 11:40:28,NaT
29,30,Blood Harvest,2021-01-09 11:40:29,NaT
15,16,Bridge 9 Records,2021-01-09 11:40:27,NaT
13,14,Carbonized Records,2021-01-09 11:40:27,NaT
5,6,Century Media,2021-01-09 11:40:23,NaT
37,38,Closed Casket Activities,2021-01-09 11:40:30,NaT


In [14]:
result = session.query(Record).join(Artist).filter(Artist.artist_name == "Furia").all()
for record in result:
    print(record.labels, record.format_id)

[<Label(label_id=1, label_name=NA)>] None
[<Label(label_id=1, label_name=NA)>] None
[<Label(label_id=1, label_name=NA)>] None


  util.warn(


In [37]:
pd.read_sql("formats", engine)

Unnamed: 0,format_id,format_name,created_at,updated_at
0,1,"12""",2021-01-09 11:40:20,NaT
1,2,"7"" Pic",2021-01-09 11:40:20,NaT
2,3,LP,2021-01-09 11:40:20,NaT
3,4,2LP,2021-01-09 11:40:20,NaT
4,5,"10""",2021-01-09 11:40:20,NaT
5,6,"7""",2021-01-09 11:40:20,NaT
6,7,MLP,2021-01-09 11:40:21,NaT
7,8,2xLP,2021-01-09 11:40:22,NaT
8,9,LP,2021-01-09 11:40:25,NaT
9,10,Pic-LP,2021-01-09 11:40:26,NaT


## Delete A Record (Temporary Section)

In [16]:
# removal = {
#     "trx_type": "Removal",
#     "credit_value": 0,
#     "artist": "Furia",
#     "title": "Marzannie, Królowej Polski",
#     "year": 2012,
#     "date": dt.datetime.today().date()
# }

In [17]:
# db_functions.set_record_to_inactive(session, removal)

In [18]:
# pd.read_sql("credit_trx", engine).tail()

In [19]:
# to_delete = session.query(CreditTrx).filter(CreditTrx.credit_trx_id == 301).one()
# session.delete(to_delete)
# session.commit()

## Backfill Missing Record-Format-Relations in Records Table (Done)

In [21]:
# def load_albums_from_xlsx(
#     filepath: Union[Path, str], genres: Optional[List] = None
# ) -> pd.DataFrame:
#     """Load the original album collection file into a dataframe.
#     You can specify a list of genres you want to include
#     (defaults to None).
#     """
#     df = pd.read_excel(filepath, engine="openpyxl")
#     if genres:
#         df = df[df["Genre"].isin(genres)]
#     return df

In [23]:
# CONFIG_PATH = Path.cwd().parent / "config.cfg"
# path_to_collection = utils.read_config_return_str(CONFIG_PATH, "COLLECTION")
# collection_df = load_albums_from_xlsx(path_to_collection)
# collection_df = clean.prettify_column_names(collection_df)
# collection_df.dropna(thresh=8, inplace=True)
# collection_df.rename(columns={"format": "record_format"}, inplace=True)
# # collection_df["purchase_date"] = collection_df["purchase_date"].dt.date.astype(str)
# # collection_df["number"] = collection_df["number"].astype(str)
# # collection_df["digitized"] = collection_df["digitized"].map({True: 1, False: 0}).astype(int)
# collection_df["label"].fillna("NA", inplace=True)  # because it creates a foreign key

# # This is for the initial ingestion only
# collection_df["artist_country"] = None
# collection_df["credit_value"] = 0
# collection_df["trx_type"] = "Initial Load"

# assert len(collection_df) == 293

In [24]:
# # backfill

# for rec in collection_df.itertuples():
#     record = db_functions.fetch_a_record_from_the_shelf(session, rec.artist, rec.title)
#     r_format = rec.record_format
    
#     # Check if the format already exists or has to be created
#     record_format = (
#         session.query(RecordFormat)
#         .filter(RecordFormat.format_name.ilike(r_format))
#         .one_or_none()
#     )
#     if record_format is None:
#         record_format = RecordFormat(format_name=r_format)
#         session.add(record_format)
    
#     record.record_format = record_format

# session.commit()

In [35]:
# # Manual Update

# record = session.query(Record).filter(Record.record_id == 295).one()
# r_format = '12"'

# # Check if the format already exists or has to be created
# record_format = (
#     session.query(RecordFormat)
#     .filter(RecordFormat.format_name.ilike(r_format))
#     .one_or_none()
# )
# if record_format is None:
#     record_format = RecordFormat(format_name=r_format)
#     session.add(record_format)

# record.record_format = record_format

# session.commit()