In [1]:
import configparser
import datetime as dt
import sys
import collections
from pathlib import Path
from typing import Union, Optional, List, Tuple

import codebook.EDA as EDA
import codebook.clean as clean
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy

In [2]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('raph-base')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 800)

np.random.seed(666)

In [3]:
Path.cwd().parent / 'src'

WindowsPath('C:/Users/r2d4/OneDrive/code/projects/20-02_disco/src')

In [4]:
sys.path.append(str(Path.cwd().parent))

from src.db_declaration import (
    Base, Artist, CreditTrx, Genre, Label, Record, RecordFormat, CreditTrx, RecordLabelLink, ArtistLabelLink
)
from src import db_functions
from src import db_connect

In [5]:
print(sys.executable)
print(sys.version)
print(f'sqlalchemy {sqlalchemy.__version__}')

C:\Users\r2d4\miniconda3\envs\py3\python.exe
3.8.3 (default, May 19 2020, 06:50:17) [MSC v.1916 64 bit (AMD64)]
sqlalchemy 1.3.17


## Connect To DB

Because we connect from `dev`subfolder, I have to work with the sqlalchemy `create_engine` function directly.

In [6]:
prod_path = r"sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db"
engine = sqlalchemy.create_engine(prod_path)

session = db_connect.create_session(engine)

# Check
session

<sqlalchemy.orm.session.Session at 0x1efd9a458b0>

## Query DB

In [7]:
pd.read_sql("records", engine).sample(3)

Unnamed: 0,record_id,artist_id,title,genre_id,label,year,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,digitized,rating,active,created_at,updated_at
101,102,34,Scars,5,,2011,3,mint green,98,,,2017-03-01,30,1,,1,2021-01-09 11:40:23,2021-01-13 07:17:50
296,297,192,Anti Life Anti You,5,,2020,3,orange with black swirl,200,,,2021-01-09,20,1,,1,2021-01-09 12:50:27,2021-01-13 07:25:12
17,18,8,All Shall Fall,3,,2009,3,bi-coloured b/w,1000,827.0,,2013-09-01,25,0,,1,2021-01-09 11:40:21,2021-01-13 07:17:50


In [8]:
pd.read_sql("records", engine).tail()

Unnamed: 0,record_id,artist_id,title,genre_id,label,year,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,digitized,rating,active,created_at,updated_at
298,299,193,The Real Fear,5,,2021,1,purple with black splatter,lim,,,2020-12-22,25,1,,1,2021-01-25 13:36:03,NaT
299,300,194,Tortured Whole,1,,2021,3,transparent light blue,200,,,2021-01-15,25,0,,1,2021-01-25 13:46:59,NaT
300,301,195,Split,1,,2021,3,white with black splatter,100,,,2021-01-20,20,1,,1,2021-01-25 14:04:00,NaT
301,302,196,Implements Of Excruciation,1,,2021,3,"clear with black, white, silver, light blue splatter",100,,,2021-01-20,65,1,,1,2021-01-25 14:12:25,NaT
302,303,197,Soul Diminished,1,,2021,3,clear with purple in orange,150,,,2021-01-25,20,1,,1,2021-01-25 15:10:14,2021-01-25 15:11:05


In [9]:
pd.read_sql("records", engine)["active"].sum()

284

In [31]:
pd.read_sql("artists", engine).tail

<bound method NDFrame.tail of      artist_id           artist_name   artist_country          created_at          updated_at
0            1             Dismember             None 2021-01-09 11:40:20                 NaT
1            2             Merciless             None 2021-01-09 11:40:20                 NaT
2            3              Overkill             None 2021-01-09 11:40:20                 NaT
3            4              Entombed             None 2021-01-09 11:40:20                 NaT
4            5             Metallica             None 2021-01-09 11:40:20                 NaT
..         ...                   ...              ...                 ...                 ...
192        193             Queensway              USA 2021-01-25 13:36:03                 NaT
193        194        Sanguisugabogg              USA 2021-01-25 13:46:59                 NaT
194        195  Coffins / Depression  Japan / Germany 2021-01-25 14:04:00                 NaT
195        196          Warp C

In [11]:
pd.read_sql("credit_trx", engine).tail(10)

Unnamed: 0,credit_trx_id,credit_trx_date,credit_trx_type,credit_value,credit_saldo,record_id,created_at,updated_at
300,301,2018-01-01,Purchase,0.0,-3.0,298.0,2021-01-25 13:30:38,NaT
301,302,2020-12-22,Purchase,0.0,-3.0,299.0,2021-01-25 13:36:03,NaT
302,303,2021-01-15,Purchase,-1.0,-4.0,300.0,2021-01-25 13:46:59,NaT
303,304,2021-01-19,Addition,1.0,-3.0,,2021-01-25 13:56:38,NaT
304,305,2021-01-20,Purchase,-1.0,-4.0,301.0,2021-01-25 14:04:00,NaT
305,306,2021-01-20,Purchase,-1.0,-5.0,302.0,2021-01-25 14:12:25,NaT
306,307,2021-01-22,Remove,1.0,-4.0,127.0,2021-01-25 14:15:18,2021-01-25 14:31:38
307,308,2021-01-22,Remove,0.0,-4.0,123.0,2021-01-25 14:16:50,2021-01-25 14:35:45
308,309,2021-01-22,Remove,1.0,-3.0,121.0,2021-01-25 14:17:40,2021-01-25 14:33:38
309,310,2021-01-25,Purchase,-1.0,-4.0,303.0,2021-01-25 15:10:14,NaT


In [12]:
pd.read_sql("record_label_link", engine).tail()
pd.read_sql("artist_label_link", engine).tail()
pd.read_sql("artist_genre_link", engine).tail()
pd.read_sql("genre_label_link", engine).tail()

Unnamed: 0,record_id,label_id
298,299,36
299,300,6
300,301,26
301,302,43
302,303,44


Unnamed: 0,artist_id,label_id
204,193,36
205,194,6
206,195,26
207,196,43
208,197,44


Unnamed: 0,artist_id,genre_id
194,193,5
195,194,1
196,195,1
197,196,1
198,197,1


Unnamed: 0,genre_id,label_id
48,5,40
49,5,41
50,1,42
51,1,43
52,1,44


In [13]:
pd.read_sql("labels", engine).sort_values("label_name")

Unnamed: 0,label_id,label_name,created_at,updated_at
14,15,20 Buck Spin,2021-01-09 11:40:27,NaT
23,24,6131 Records,2021-01-09 11:40:28,NaT
17,18,BBB Records,2021-01-09 11:40:27,NaT
22,23,BDHW Records,2021-01-09 11:40:28,NaT
19,20,Back on Black,2021-01-09 11:40:28,NaT
29,30,Blood Harvest,2021-01-09 11:40:29,NaT
15,16,Bridge 9 Records,2021-01-09 11:40:27,NaT
13,14,Carbonized Records,2021-01-09 11:40:27,NaT
5,6,Century Media,2021-01-09 11:40:23,NaT
37,38,Closed Casket Activities,2021-01-09 11:40:30,NaT


In [14]:
result = session.query(Record).join(Artist).filter(Artist.artist_name == "Entombed").all()
for record in result:
    print(record, record.purchase_date)

<Record(record_id=11, title=Uprising, artist_id=4)> 2013-07-01
<Record(record_id=12, title=Morning Star, artist_id=4)> 2013-07-01
<Record(record_id=23, title=Wolverine Blues, artist_id=4)> 2014-01-01
<Record(record_id=36, title=Clandestine, artist_id=4)> 2015-08-01
<Record(record_id=40, title=Left Hand Path, artist_id=4)> 2015-10-01


  util.warn(


In [15]:
result = session.query(Artist).filter(Artist.artist_name == "Entombed").one()
print(result.artist_country)

None


In [16]:
pd.read_sql("formats", engine)

Unnamed: 0,format_id,format_name,created_at,updated_at
0,1,"12""",2021-01-09 11:40:20,NaT
1,2,"7"" Pic",2021-01-09 11:40:20,NaT
2,3,LP,2021-01-09 11:40:20,NaT
3,4,2LP,2021-01-09 11:40:20,NaT
4,5,"10""",2021-01-09 11:40:20,NaT
5,6,"7""",2021-01-09 11:40:20,NaT
6,7,MLP,2021-01-09 11:40:21,NaT
7,8,2xLP,2021-01-09 11:40:22,NaT
8,9,LP,2021-01-09 11:40:25,NaT
9,10,Pic-LP,2021-01-09 11:40:26,NaT


## Delete A Record (Temporary Section)

In [17]:
# removal = {
#     "trx_type": "Removal",
#     "credit_value": 0,
#     "artist": "Furia",
#     "title": "Marzannie, Królowej Polski",
#     "year": 2012,
#     "date": dt.datetime.today().date()
# }

In [18]:
# db_functions.set_record_to_inactive(session, removal)

In [19]:
# pd.read_sql("credit_trx", engine).tail()

In [20]:
# to_delete = session.query(CreditTrx).filter(CreditTrx.credit_trx_id == 301).one()
# session.delete(to_delete)
# session.commit()

## Backfill Missing Record-Format-Relations in Records Table (Done)

In [21]:
# def load_albums_from_xlsx(
#     filepath: Union[Path, str], genres: Optional[List] = None
# ) -> pd.DataFrame:
#     """Load the original album collection file into a dataframe.
#     You can specify a list of genres you want to include
#     (defaults to None).
#     """
#     df = pd.read_excel(filepath, engine="openpyxl")
#     if genres:
#         df = df[df["Genre"].isin(genres)]
#     return df

In [22]:
# CONFIG_PATH = Path.cwd().parent / "config.cfg"
# path_to_collection = utils.read_config_return_str(CONFIG_PATH, "COLLECTION")
# collection_df = load_albums_from_xlsx(path_to_collection)
# collection_df = clean.prettify_column_names(collection_df)
# collection_df.dropna(thresh=8, inplace=True)
# collection_df.rename(columns={"format": "record_format"}, inplace=True)
# # collection_df["purchase_date"] = collection_df["purchase_date"].dt.date.astype(str)
# # collection_df["number"] = collection_df["number"].astype(str)
# # collection_df["digitized"] = collection_df["digitized"].map({True: 1, False: 0}).astype(int)
# collection_df["label"].fillna("NA", inplace=True)  # because it creates a foreign key

# # This is for the initial ingestion only
# collection_df["artist_country"] = None
# collection_df["credit_value"] = 0
# collection_df["trx_type"] = "Initial Load"

# assert len(collection_df) == 293

In [23]:
# # backfill

# for rec in collection_df.itertuples():
#     record = db_functions.fetch_a_record_from_the_shelf(session, rec.artist, rec.title)
#     r_format = rec.record_format
    
#     # Check if the format already exists or has to be created
#     record_format = (
#         session.query(RecordFormat)
#         .filter(RecordFormat.format_name.ilike(r_format))
#         .one_or_none()
#     )
#     if record_format is None:
#         record_format = RecordFormat(format_name=r_format)
#         session.add(record_format)
    
#     record.record_format = record_format

# session.commit()

In [24]:
# # Manual Update

# record = session.query(Record).filter(Record.record_id == 295).one()
# r_format = '12"'

# # Check if the format already exists or has to be created
# record_format = (
#     session.query(RecordFormat)
#     .filter(RecordFormat.format_name.ilike(r_format))
#     .one_or_none()
# )
# if record_format is None:
#     record_format = RecordFormat(format_name=r_format)
#     session.add(record_format)

# record.record_format = record_format

# session.commit()

## Manual Updates

In [25]:
# result = session.query(CreditTrx).filter(CreditTrx.credit_trx_type == "Removal").all()
# for r in result:
#     r.credit_trx_type = "Remove"
#     print(r)

In [29]:
# r = session.query(Artist).filter(Artist.artist_name == "CELESTIAL SANCTUARY").one()
# r.artist_name = "Celestial Sanctuary"

In [30]:
# session.commit()