In [1]:
import configparser
import datetime as dt
import sys
import collections
from pathlib import Path
from typing import Union, Optional, List, Tuple

import codebook.EDA as EDA
import codebook.clean as clean
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy

In [2]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('raph-base')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 800)

np.random.seed(666)

In [3]:
Path.cwd().parent / 'src'

WindowsPath('C:/Users/r2d4/OneDrive/code/projects/20-02_disco/src')

In [4]:
sys.path.append(str(Path.cwd().parent))

from src.db_declaration import (
    Base, Artist, CreditTrx, Genre, Label, Record, RecordFormat, CreditTrx, LabelRecordLink, ArtistLabelLink
)
from src import db_functions
from src import db_connect

In [5]:
print(sys.executable)
print(sys.version)
print(f'sqlalchemy {sqlalchemy.__version__}')

C:\Users\r2d4\miniconda3\envs\py3\python.exe
3.8.3 (default, May 19 2020, 06:50:17) [MSC v.1916 64 bit (AMD64)]
sqlalchemy 1.3.17


## Connect To DB

Because we connect from `dev`subfolder, I have to work with the sqlalchemy `create_engine` function directly.

In [39]:
# session.close()
prod_path = r"sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db"
engine = sqlalchemy.create_engine(prod_path)

session = db_connect.create_session(engine)

# Check
session

<sqlalchemy.orm.session.Session at 0x21a1cacc2e0>

## Query DB

In [7]:
pd.read_sql("records", engine).tail(5)

Unnamed: 0,record_id,title,year,genre_id,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,rating,is_digitized,is_active,created_at,updated_at
308,309,Demo 2020,2020,1,6,yellow,100.0,,,2020-10-23,10,9.0,1,1,2021-02-23 13:22:15,NaT
309,310,One Way Track,2020,5,11,,,,,2021-02-19,10,,0,1,2021-02-23 13:22:15,NaT
310,311,Most Dangerous Game,2020,5,6,clear with red splatter,400.0,,,2021-02-19,10,,1,1,2021-02-23 13:22:16,2021-02-24 12:52:10
311,312,Loyalty At All Costs,2021,5,6,black with red swirl,100.0,,Streets Of Hate exclusive,2021-02-19,10,,0,1,2021-02-23 13:22:16,NaT
312,313,Ultimate Aggression,2019,5,3,"half baby blue, half electric blue",300.0,,,2020-09-13,35,,1,1,2021-02-23 13:26:45,NaT


In [8]:
pd.read_sql("records", engine)["is_active"].sum()

293

In [9]:
pd.read_sql("artists", engine).tail

<bound method NDFrame.tail of      artist_id         artist_name artist_country          created_at updated_at
0            1           Dismember         Sweden 2021-02-23 13:22:07        NaT
1            2           Merciless             NA 2021-02-23 13:22:07        NaT
2            3            Overkill             NA 2021-02-23 13:22:07        NaT
3            4            Entombed         Sweden 2021-02-23 13:22:07        NaT
4            5           Metallica             NA 2021-02-23 13:22:07        NaT
..         ...                 ...            ...                 ...        ...
198        199  Heads For The Dead  International 2021-02-23 13:22:15        NaT
199        200            Fleshrot            USA 2021-02-23 13:22:15        NaT
200        201              End It            USA 2021-02-23 13:22:15        NaT
201        202              Kharma            USA 2021-02-23 13:22:16        NaT
202        203            Gridiron            USA 2021-02-23 13:22:16        Na

In [10]:
pd.read_sql("credit_trx", engine).tail(10)

Unnamed: 0,credit_trx_id,credit_trx_date,credit_trx_type,credit_value,credit_saldo,record_id,created_at,updated_at
313,314,2021-02-05,Purchase,-1.0,-4.0,306.0,2021-02-23 13:22:16,NaT
314,315,2021-02-05,Purchase,-1.0,-5.0,307.0,2021-02-23 13:22:16,NaT
315,316,2021-02-08,Addition,1.0,-4.0,,2021-02-23 13:22:16,NaT
316,317,2021-02-08,Purchase,-1.0,-5.0,308.0,2021-02-23 13:22:16,NaT
317,318,2021-02-18,Addition,1.0,-4.0,,2021-02-23 13:22:16,NaT
318,319,2020-10-23,Purchase,0.0,-4.0,309.0,2021-02-23 13:22:16,NaT
319,320,2021-02-19,Purchase,0.0,-4.0,311.0,2021-02-23 13:22:16,NaT
320,321,2021-02-19,Purchase,0.0,-4.0,312.0,2021-02-23 13:22:16,NaT
321,322,2021-02-19,Purchase,0.0,-4.0,310.0,2021-02-23 13:22:16,2021-02-23 13:24:22
322,323,2020-09-13,Purchase,0.0,-4.0,313.0,2021-02-23 13:26:45,NaT


In [11]:
pd.read_sql("record_label_link", engine).tail()
pd.read_sql("artist_label_link", engine).tail()
pd.read_sql("artist_genre_link", engine).tail()
pd.read_sql("genre_label_link", engine).tail()

Unnamed: 0,record_id,label_id
303,304,25
304,305,33
305,306,33
306,307,33
307,308,45


Unnamed: 0,artist_id,label_id
216,201,45
217,202,45
218,67,46
219,67,47
220,67,48


Unnamed: 0,artist_id,genre_id
201,199,1
202,200,1
203,201,5
204,202,5
205,203,5


Unnamed: 0,genre_id,label_id
52,1,44
53,5,45
54,7,46
55,7,47
56,7,48


In [12]:
pd.read_sql("labels", engine).sort_values("label_name")

Unnamed: 0,label_id,label_name,created_at,updated_at
14,15,20 Buck Spin,2021-02-23 13:22:13,NaT
24,25,6131,2021-02-23 13:22:13,NaT
46,47,Active Rebellion,2021-02-23 13:54:06,NaT
17,18,BBB,2021-02-23 13:22:13,NaT
23,24,BDHW,2021-02-23 13:22:13,NaT
20,21,Back on Black,2021-02-23 13:22:13,NaT
29,30,Blood Harvest,2021-02-23 13:22:14,NaT
15,16,Bridge 9,2021-02-23 13:22:13,NaT
13,14,Carbonized,2021-02-23 13:22:13,NaT
5,6,Century Media,2021-02-23 13:22:09,NaT


In [13]:
result = session.query(Record).filter(Record.artists.any(Artist.artist_name == "Entombed")).all()
for record in result:
    print(record, record.purchase_date)

<Record(record_id=11, title=Uprising, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2013-07-01
<Record(record_id=12, title=Morning Star, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2013-07-01
<Record(record_id=23, title=Wolverine Blues, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2014-01-01
<Record(record_id=36, title=Clandestine, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2015-08-01
<Record(record_id=40, title=Left Hand Path, artist=[<Artist(artist_id=4, artist_name=Entombed)>])> 2015-10-01


  util.warn(


In [14]:
result = session.query(Artist).filter(Artist.artist_name == "Entombed").one()
print(result.artist_country)

Sweden


In [15]:
pd.read_sql("formats", engine)

Unnamed: 0,format_id,format_name,created_at,updated_at
0,1,"12""",2021-02-23 13:22:07,NaT
1,2,"7"" Pic",2021-02-23 13:22:07,NaT
2,3,LP,2021-02-23 13:22:07,NaT
3,4,2LP,2021-02-23 13:22:07,NaT
4,5,"10""",2021-02-23 13:22:07,NaT
5,6,"7""",2021-02-23 13:22:07,NaT
6,7,MLP,2021-02-23 13:22:07,NaT
7,8,2xLP,2021-02-23 13:22:08,NaT
8,9,LP,2021-02-23 13:22:11,NaT
9,10,Pic-LP,2021-02-23 13:22:12,NaT


In [16]:
pd.read_sql("genres", engine)

Unnamed: 0,genre_id,genre_name,created_at,updated_at
0,1,Death Metal,2021-02-23 13:22:07,NaT
1,2,Thrash Metal,2021-02-23 13:22:07,NaT
2,3,Black Metal,2021-02-23 13:22:07,NaT
3,4,Speed Metal,2021-02-23 13:22:07,NaT
4,5,Hardcore,2021-02-23 13:22:08,NaT
5,6,Punk,2021-02-23 13:22:10,NaT
6,7,Crust,2021-02-23 13:22:10,NaT
7,8,Crossover,2021-02-23 13:22:14,NaT
8,9,Grindcore,2021-02-23 13:22:15,NaT


## Inactivate A Record (Temporary Section)

In [17]:
# removal = {
#     "trx_type": "Removal",
#     "credit_value": 0,
#     "artist": "Furia",
#     "title": "Marzannie, Królowej Polski",
#     "year": 2012,
#     "date": dt.datetime.today().date()
# }

In [18]:
# db_functions.set_record_to_inactive(session, removal)

In [19]:
# pd.read_sql("credit_trx", engine).tail()

In [20]:
# to_delete = session.query(CreditTrx).filter(CreditTrx.credit_trx_id == 301).one()
# session.delete(to_delete)
# session.commit()

## Backfill Missing Record-Format-Relations in Records Table (Done)

In [21]:
# def load_albums_from_xlsx(
#     filepath: Union[Path, str], genres: Optional[List] = None
# ) -> pd.DataFrame:
#     """Load the original album collection file into a dataframe.
#     You can specify a list of genres you want to include
#     (defaults to None).
#     """
#     df = pd.read_excel(filepath, engine="openpyxl")
#     if genres:
#         df = df[df["Genre"].isin(genres)]
#     return df

In [22]:
# CONFIG_PATH = Path.cwd().parent / "config.cfg"
# path_to_collection = utils.read_config_return_str(CONFIG_PATH, "COLLECTION")
# collection_df = load_albums_from_xlsx(path_to_collection)
# collection_df = clean.prettify_column_names(collection_df)
# collection_df.dropna(thresh=8, inplace=True)
# collection_df.rename(columns={"format": "record_format"}, inplace=True)
# # collection_df["purchase_date"] = collection_df["purchase_date"].dt.date.astype(str)
# # collection_df["number"] = collection_df["number"].astype(str)
# # collection_df["digitized"] = collection_df["digitized"].map({True: 1, False: 0}).astype(int)
# collection_df["label"].fillna("NA", inplace=True)  # because it creates a foreign key

# # This is for the initial ingestion only
# collection_df["artist_country"] = None
# collection_df["credit_value"] = 0
# collection_df["trx_type"] = "Initial Load"

# assert len(collection_df) == 293

In [23]:
# # backfill

# for rec in collection_df.itertuples():
#     record = db_functions.fetch_a_record_from_the_shelf(session, rec.artist, rec.title)
#     r_format = rec.record_format
    
#     # Check if the format already exists or has to be created
#     record_format = (
#         session.query(RecordFormat)
#         .filter(RecordFormat.format_name.ilike(r_format))
#         .one_or_none()
#     )
#     if record_format is None:
#         record_format = RecordFormat(format_name=r_format)
#         session.add(record_format)
    
#     record.record_format = record_format

# session.commit()

In [24]:
# # Manual Update

# record = session.query(Record).filter(Record.record_id == 295).one()
# r_format = '12"'

# # Check if the format already exists or has to be created
# record_format = (
#     session.query(RecordFormat)
#     .filter(RecordFormat.format_name.ilike(r_format))
#     .one_or_none()
# )
# if record_format is None:
#     record_format = RecordFormat(format_name=r_format)
#     session.add(record_format)

# record.record_format = record_format

# session.commit()

## Manual Updates

### Credit Trx

In [25]:
# result = session.query(CreditTrx).filter(CreditTrx.credit_trx_type == "Removal").all()
# for r in result:
#     r.credit_trx_type = "Remove"
#     print(r)

In [26]:
# r = session.query(Artist).filter(Artist.artist_name == "CELESTIAL SANCTUARY").one()
# r.artist_name = "Celestial Sanctuary"

In [27]:
# session.commit()

### Remove "Records" From Label-Names And Split Labels

In [28]:
# result = session.query(Label).filter(Label.label_name.like('% Records')).all()
# for x in result:
#     x.label_name = x.label_name.rpartition(" Records")[0]
# session.commit()

In [29]:
# session.query(Label).get(42)

In [30]:
# result = session.query(Label).filter(Label.label_name.contains(' / ')).all()
# for x in result:
#     print(x)

In [31]:
# result = session.query(Record).filter(Record.labels.any(Label.label_id.in_([19, 31, 32])))
# for x in result:
#     print(x)

In [32]:
# jack = session.query(Label).filter(Label.label_id.in_([19, 31, 32])).all()
# for x in jack:
#     print(x)
#     session.delete(x)
# session.commit()

In [33]:
# session.close()

### Update Stuff

In [54]:
result = (
    session.query(Record)
    .join(RecordFormat, RecordFormat.format_id == Record.format_id)
    .filter(RecordFormat.format_name == '2LP')
    .all()
)
for r in result:
    print(r.record_id, r.title)


6 Ironbound
9 Electric Age
263 Døden Læger Alle Sår
