In [1]:
import configparser
import datetime as dt
import sys
import collections
from pathlib import Path
from typing import Union, Optional, List, Tuple

import codebook.EDA as EDA
import codebook.clean as clean
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy

In [2]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('raph-base')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 800)

np.random.seed(666)

In [3]:
Path.cwd().parent / 'src'

WindowsPath('C:/Users/r2d4/OneDrive/code/projects/20-02_disco/src')

In [5]:
sys.path.append(str(Path.cwd().parent))

from src.db_declaration import (
    Base, Artist, CreditTrx, Genre, Label, Record, RecordFormat, CreditTrx, LabelRecordLink, ArtistLabelLink
)
from src import db_functions
from src import db_connect

In [6]:
print(sys.executable)
print(sys.version)
print(f'sqlalchemy {sqlalchemy.__version__}')

C:\Users\r2d4\miniconda3\envs\py3\python.exe
3.8.3 (default, May 19 2020, 06:50:17) [MSC v.1916 64 bit (AMD64)]
sqlalchemy 1.3.17


## Connect To DB

Because we connect from `dev`subfolder, I have to work with the sqlalchemy `create_engine` function directly.

In [7]:
prod_path = r"sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db"
engine = sqlalchemy.create_engine(prod_path)

session = db_connect.create_session(engine)

# Check
session

<sqlalchemy.orm.session.Session at 0x29c34e6e910>

## Query DB

In [11]:
pd.read_sql("records", engine).tail(5)

Unnamed: 0,record_id,title,year,genre_id,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,rating,is_digitized,is_active,created_at,updated_at
303,304,Opening The Hell Gates,2021,1,3,neon green,150,,1st vinyl issue of 2015 album,2021-01-25,25,,1,1,2021-02-17 09:02:38,NaT
304,305,Piles of Festering Decomposition,2021,1,1,purple-pink,lim,,1st vinyl pressing of 2020 demo,2021-02-05,20,9.0,1,1,2021-02-17 09:02:38,NaT
305,306,Self-Immolation Suite,2021,1,1,red with black streaks,lim,,1st vinyl pressing of 2019 / 2020 EPs,2021-02-05,20,,1,1,2021-02-17 09:02:38,NaT
306,307,Human Altar,2021,1,1,coke clear with blue swirl,lim,,1st vinyl pressing of 2020 demo,2021-02-05,20,,1,1,2021-02-17 09:02:38,NaT
307,308,Into The Red,2020,1,3,clear with read and black and white splatter,80,,Variant #4 out of 5 x 80 LPs,2021-02-08,25,,1,1,2021-02-17 09:02:38,NaT


In [13]:
pd.read_sql("records", engine)["is_active"].sum()

289

In [14]:
pd.read_sql("artists", engine).tail

<bound method NDFrame.tail of      artist_id          artist_name artist_country          created_at updated_at
0            1            Dismember             NA 2021-02-17 09:02:28        NaT
1            2            Merciless             NA 2021-02-17 09:02:28        NaT
2            3             Overkill             NA 2021-02-17 09:02:28        NaT
3            4             Entombed             NA 2021-02-17 09:02:28        NaT
4            5            Metallica             NA 2021-02-17 09:02:28        NaT
..         ...                  ...            ...                 ...        ...
194        195  Celestial Sanctuary        England 2021-02-17 09:02:38        NaT
195        196      200 STAB WOUNDS            USA 2021-02-17 09:02:38        NaT
196        197            Unurnment            USA 2021-02-17 09:02:38        NaT
197        198           Infestment            USA 2021-02-17 09:02:38        NaT
198        199   Heads For The Dead  International 2021-02-17 09:02:

In [15]:
pd.read_sql("credit_trx", engine).tail(10)

Unnamed: 0,credit_trx_id,credit_trx_date,credit_trx_type,credit_value,credit_saldo,record_id,created_at,updated_at
307,308,2021-01-22,Remove,0.0,-4.0,123.0,2021-02-17 09:02:38,NaT
308,309,2021-01-22,Remove,1.0,-3.0,121.0,2021-02-17 09:02:38,NaT
309,310,2021-01-25,Purchase,-1.0,-4.0,303.0,2021-02-17 09:02:38,NaT
310,311,2021-01-25,Purchase,0.0,-4.0,304.0,2021-02-17 09:02:38,NaT
311,312,2021-01-29,Addition,1.0,-3.0,,2021-02-17 09:02:38,NaT
312,313,2021-02-05,Purchase,0.0,-3.0,305.0,2021-02-17 09:02:38,NaT
313,314,2021-02-05,Purchase,-1.0,-4.0,306.0,2021-02-17 09:02:38,NaT
314,315,2021-02-05,Purchase,-1.0,-5.0,307.0,2021-02-17 09:02:38,NaT
315,316,2021-02-08,Addition,1.0,-4.0,,2021-02-17 09:02:38,NaT
316,317,2021-02-08,Purchase,-1.0,-5.0,308.0,2021-02-17 09:02:38,NaT


In [16]:
pd.read_sql("record_label_link", engine).tail()
pd.read_sql("artist_label_link", engine).tail()
pd.read_sql("artist_genre_link", engine).tail()
pd.read_sql("genre_label_link", engine).tail()

Unnamed: 0,record_id,label_id
303,304,25
304,305,33
305,306,33
306,307,33
307,308,45


Unnamed: 0,artist_id,label_id
208,195,44
209,196,33
210,197,33
211,198,33
212,199,45


Unnamed: 0,artist_id,genre_id
198,195,1
199,196,1
200,197,1
201,198,1
202,199,1


Unnamed: 0,genre_id,label_id
49,5,41
50,1,42
51,1,43
52,1,44
53,1,45


In [17]:
pd.read_sql("labels", engine).sort_values("label_name")

Unnamed: 0,label_id,label_name,created_at,updated_at
14,15,20 Buck Spin,2021-02-17 09:02:35,NaT
23,24,6131 Records,2021-02-17 09:02:36,NaT
17,18,BBB Records,2021-02-17 09:02:35,NaT
22,23,BDHW Records,2021-02-17 09:02:36,NaT
19,20,Back on Black,2021-02-17 09:02:35,NaT
29,30,Blood Harvest,2021-02-17 09:02:36,NaT
15,16,Bridge 9 Records,2021-02-17 09:02:35,NaT
13,14,Carbonized Records,2021-02-17 09:02:35,NaT
5,6,Century Media,2021-02-17 09:02:31,NaT
37,38,Closed Casket Activities,2021-02-17 09:02:38,NaT


In [14]:
result = session.query(Record).join(Artist).filter(Artist.artist_name == "Entombed").all()
for record in result:
    print(record, record.purchase_date)

<Record(record_id=11, title=Uprising, artist_id=4)> 2013-07-01
<Record(record_id=12, title=Morning Star, artist_id=4)> 2013-07-01
<Record(record_id=23, title=Wolverine Blues, artist_id=4)> 2014-01-01
<Record(record_id=36, title=Clandestine, artist_id=4)> 2015-08-01
<Record(record_id=40, title=Left Hand Path, artist_id=4)> 2015-10-01


  util.warn(


In [15]:
result = session.query(Artist).filter(Artist.artist_name == "Entombed").one()
print(result.artist_country)

None


In [16]:
pd.read_sql("formats", engine)

Unnamed: 0,format_id,format_name,created_at,updated_at
0,1,"12""",2021-01-09 11:40:20,NaT
1,2,"7"" Pic",2021-01-09 11:40:20,NaT
2,3,LP,2021-01-09 11:40:20,NaT
3,4,2LP,2021-01-09 11:40:20,NaT
4,5,"10""",2021-01-09 11:40:20,NaT
5,6,"7""",2021-01-09 11:40:20,NaT
6,7,MLP,2021-01-09 11:40:21,NaT
7,8,2xLP,2021-01-09 11:40:22,NaT
8,9,LP,2021-01-09 11:40:25,NaT
9,10,Pic-LP,2021-01-09 11:40:26,NaT


## Delete A Record (Temporary Section)

In [17]:
# removal = {
#     "trx_type": "Removal",
#     "credit_value": 0,
#     "artist": "Furia",
#     "title": "Marzannie, Królowej Polski",
#     "year": 2012,
#     "date": dt.datetime.today().date()
# }

In [18]:
# db_functions.set_record_to_inactive(session, removal)

In [19]:
# pd.read_sql("credit_trx", engine).tail()

In [20]:
# to_delete = session.query(CreditTrx).filter(CreditTrx.credit_trx_id == 301).one()
# session.delete(to_delete)
# session.commit()

## Backfill Missing Record-Format-Relations in Records Table (Done)

In [21]:
# def load_albums_from_xlsx(
#     filepath: Union[Path, str], genres: Optional[List] = None
# ) -> pd.DataFrame:
#     """Load the original album collection file into a dataframe.
#     You can specify a list of genres you want to include
#     (defaults to None).
#     """
#     df = pd.read_excel(filepath, engine="openpyxl")
#     if genres:
#         df = df[df["Genre"].isin(genres)]
#     return df

In [22]:
# CONFIG_PATH = Path.cwd().parent / "config.cfg"
# path_to_collection = utils.read_config_return_str(CONFIG_PATH, "COLLECTION")
# collection_df = load_albums_from_xlsx(path_to_collection)
# collection_df = clean.prettify_column_names(collection_df)
# collection_df.dropna(thresh=8, inplace=True)
# collection_df.rename(columns={"format": "record_format"}, inplace=True)
# # collection_df["purchase_date"] = collection_df["purchase_date"].dt.date.astype(str)
# # collection_df["number"] = collection_df["number"].astype(str)
# # collection_df["digitized"] = collection_df["digitized"].map({True: 1, False: 0}).astype(int)
# collection_df["label"].fillna("NA", inplace=True)  # because it creates a foreign key

# # This is for the initial ingestion only
# collection_df["artist_country"] = None
# collection_df["credit_value"] = 0
# collection_df["trx_type"] = "Initial Load"

# assert len(collection_df) == 293

In [23]:
# # backfill

# for rec in collection_df.itertuples():
#     record = db_functions.fetch_a_record_from_the_shelf(session, rec.artist, rec.title)
#     r_format = rec.record_format
    
#     # Check if the format already exists or has to be created
#     record_format = (
#         session.query(RecordFormat)
#         .filter(RecordFormat.format_name.ilike(r_format))
#         .one_or_none()
#     )
#     if record_format is None:
#         record_format = RecordFormat(format_name=r_format)
#         session.add(record_format)
    
#     record.record_format = record_format

# session.commit()

In [24]:
# # Manual Update

# record = session.query(Record).filter(Record.record_id == 295).one()
# r_format = '12"'

# # Check if the format already exists or has to be created
# record_format = (
#     session.query(RecordFormat)
#     .filter(RecordFormat.format_name.ilike(r_format))
#     .one_or_none()
# )
# if record_format is None:
#     record_format = RecordFormat(format_name=r_format)
#     session.add(record_format)

# record.record_format = record_format

# session.commit()

## Manual Updates

In [25]:
# result = session.query(CreditTrx).filter(CreditTrx.credit_trx_type == "Removal").all()
# for r in result:
#     r.credit_trx_type = "Remove"
#     print(r)

In [29]:
# r = session.query(Artist).filter(Artist.artist_name == "CELESTIAL SANCTUARY").one()
# r.artist_name = "Celestial Sanctuary"

In [30]:
# session.commit()