In [1]:
import configparser
import datetime as dt
import sys
import collections
from pathlib import Path
from typing import Union, Optional, List, Tuple

import codebook.EDA as EDA
import codebook.clean as clean
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy
from sqlalchemy import func, distinct

In [2]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('raph-base')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 800)

np.random.seed(666)

In [3]:
sys.path.append(str(Path.cwd().parent))

from src.db_declaration import Base, Artist, CreditTrx, Genre, Label, Record, RecordFormat
from src import db_functions
from src import utils

In [4]:
print(sys.executable)
print(sys.version)
print(f'sqlalchemy {sqlalchemy.__version__}')

C:\Users\r2d4\miniconda3\envs\py3\python.exe
3.8.3 (default, May 19 2020, 06:50:17) [MSC v.1916 64 bit (AMD64)]
sqlalchemy 1.3.17


## Connect To And (Re-)Create DB

In [5]:
CONFIG_PATH = Path.cwd().parent / "config.cfg"

In [6]:
path_to_db = utils.read_config_return_str(CONFIG_PATH, "SQLITE_DEV")
engine = utils.create_engine(path_to_db)
session = utils.create_session(engine)
utils.create_DB_anew(engine, Base)

C:\Users\r2d4\OneDrive\code\projects\20-02_disco\dev\DeafDiscoBase.db


In [7]:
def load_some_albums_from_xlsx(
    filepath: Union[Path, str],
    n: int=3,
    random_state: int=5,
) -> pd.DataFrame:
    """Load the original album collection file into a dataframe.
    You can specify a list of genres you want to include
    (defaults to None).
    """
    df = pd.read_excel(filepath, engine="openpyxl")
    return df.sample(n=n, random_state=random_state)


def clean_collection(collection_df):
    collection_df = clean.prettify_column_names(collection_df)
    collection_df.dropna(thresh=8, inplace=True)
    collection_df.rename(columns={"format": "record_format"}, inplace=True)
    collection_df["label"].fillna("NA", inplace=True)  # because it creates a foreign key
    collection_df["vinyl_color"] = np.nan  # just for aesthetics ...
    collection_df["artist_country"] = None
    collection_df["credit_value"] = 0
    collection_df["trx_type"] = "Initial Load"
    return collection_df

In [8]:
path_to_collection = utils.read_config_return_str(CONFIG_PATH, "COLLECTION")
collection_df = load_some_albums_from_xlsx(path_to_collection)
collection_df = clean_collection(collection_df)

In [9]:
collection_df

Unnamed: 0,active,artist,title,record_format,year,genre,price,purchase_date,vinyl_color,lim_edition,number,label,digitized,remarks,rating,artist_country,credit_value,trx_type
28,1.0,Emperor,s/t,MLP,1993.0,Black Metal,190.0,2014-08-01,,,,,False,,,,0,Initial Load
244,1.0,Gatecreeper,Deserted,LP,2019.0,Death Metal,70.0,2020-01-01,,100.0,,Relapse Records,True,,8.0,,0,Initial Load
123,1.0,Agnosy,Traits of The Past,LP,2014.0,Crust,15.0,2017-08-01,,,,,True,,,,0,Initial Load


In [10]:
# Create an additional test_record

test_record = load_some_albums_from_xlsx(path_to_collection, 1, 1)
test_record = clean_collection(test_record)
test_record = test_record.to_dict(orient="records")[0]
test_record["trx_type"] = "Purchase"
test_record["credit_value"] = 1
test_record

{'active': 1.0,
 'artist': 'Witch Vomit',
 'title': 'Poisoned Blood',
 'record_format': 'MLP',
 'year': 2017.0,
 'genre': 'Death Metal',
 'price': 15.0,
 'purchase_date': Timestamp('2020-06-01 00:00:00'),
 'vinyl_color': nan,
 'lim_edition': nan,
 'number': nan,
 'label': '20 Buck Spin',
 'digitized': True,
 'remarks': nan,
 'rating': 8.0,
 'artist_country': None,
 'credit_value': 1,
 'trx_type': 'Purchase'}

### Initial Load of 3 Records

In [11]:
def insert_df_with_sqlalchemy_orm(session, df):
    for x in df.to_dict("records"):
        db_functions.add_new_record(session, x)

In [12]:
insert_df_with_sqlalchemy_orm(session, collection_df)

assert session.query(Record).count() == 3

  util.warn(


### Insertion of 2 Credit Addition Trx

In [13]:
# to_delete = session.query(CreditTrx).filter(CreditTrx.credit_trx_id == 8).one()
# session.delete(to_delete)
# session.commit()

In [14]:
# Initial trx, 11 days ago

addition_trx = CreditTrx(
    credit_trx_date=dt.datetime.today().date() - dt.timedelta(11),
    credit_trx_type="Addition",
    credit_value=1,
    credit_saldo=1,
    record_id=np.nan
)
session.add(addition_trx)

# And a regular interval addition
db_functions.add_regular_credits(session)

session.commit()

Creating 'Addition' Trx for: 2021-01-17


In [15]:
session.query(CreditTrx).all()

[<CreditTrx(credit_trx_id=1, credit_trx_date=2014-08-01, credit_trx_type=Initial Load, credit_value=0.0, credit_saldo=0.0, record_id=1)>,
 <CreditTrx(credit_trx_id=2, credit_trx_date=2020-01-01, credit_trx_type=Initial Load, credit_value=0.0, credit_saldo=0.0, record_id=2)>,
 <CreditTrx(credit_trx_id=3, credit_trx_date=2017-08-01, credit_trx_type=Initial Load, credit_value=0.0, credit_saldo=0.0, record_id=3)>,
 <CreditTrx(credit_trx_id=4, credit_trx_date=2021-01-07, credit_trx_type=Addition, credit_value=1.0, credit_saldo=1.0, record_id=None)>,
 <CreditTrx(credit_trx_id=5, credit_trx_date=2021-01-17, credit_trx_type=Addition, credit_value=1.0, credit_saldo=2.0, record_id=None)>]

### Insertion of new Record

In [16]:
db_functions.add_new_record(session, test_record)

assert session.query(Record).count() == 4

In [17]:
session.query(CreditTrx).all()[-2:]

[<CreditTrx(credit_trx_id=5, credit_trx_date=2021-01-17, credit_trx_type=Addition, credit_value=1.0, credit_saldo=2.0, record_id=None)>,
 <CreditTrx(credit_trx_id=6, credit_trx_date=2020-06-01, credit_trx_type=Purchase, credit_value=-1.0, credit_saldo=1.0, record_id=4)>]

### Removal of Existing Record

Necessary cols: trx_type, credit_value, title, artist, date

In [20]:
test_removal = {
    "trx_type": "Remove",
    "credit_value": 1,
    "artist": "Emperor",
    "title": "s/t",
#     "year": 1993,
    "date": dt.datetime.today().date()
}

In [21]:
db_functions.set_record_to_inactive(session, test_removal)

Record set to inactive.


In [22]:
session.query(func.count(distinct(Record.active))).all()
session.query(func.count(Record.record_id)).group_by(Record.active).all()

[(2)]

[(1), (3)]

In [33]:
session.query(CreditTrx).all()[-3:]

[<CreditTrx(credit_trx_id=7, credit_trx_date=2021-01-18, credit_trx_type=Remove, credit_value=1.0, credit_saldo=2.0, record_id=1)>,
 <CreditTrx(credit_trx_id=8, credit_trx_date=2021-01-18, credit_trx_type=Remove, credit_value=1.0, credit_saldo=3.0, record_id=2)>,
 <CreditTrx(credit_trx_id=9, credit_trx_date=2021-01-18, credit_trx_type=Remove, credit_value=0.0, credit_saldo=3.0, record_id=4)>]

### Reactivation of inactive Record [OPEN]

In [24]:
# ATTENTION It has to be possible ro re-add inactive records! (and to pay for it in credits!)

## Query DB

In [32]:
pd.read_sql("records", engine)

Unnamed: 0,record_id,artist_id,title,genre_id,label,year,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,digitized,rating,active,created_at,updated_at
0,1,1,s/t,1,,1993,1,,,,,2014-08-01,190,0,,0,2021-01-18 12:41:20,2021-01-18 12:42:06
1,2,2,Deserted,2,,2019,2,,100.0,,,2020-01-01,70,1,8.0,0,2021-01-18 12:41:20,2021-01-18 12:52:43
2,3,3,Traits of The Past,3,,2014,2,,,,,2017-08-01,15,1,,1,2021-01-18 12:41:20,NaT
3,4,4,Poisoned Blood,2,,2017,1,,,,,2020-06-01,15,1,8.0,0,2021-01-18 12:41:21,2021-01-18 13:06:14


In [26]:
pd.read_sql("formats", engine)

Unnamed: 0,format_id,format_name,created_at,updated_at
0,1,MLP,2021-01-18 12:41:20,NaT
1,2,LP,2021-01-18 12:41:20,NaT


In [31]:
pd.read_sql("artists", engine)

Unnamed: 0,artist_id,artist_name,artist_country,created_at,updated_at
0,1,Emperor,,2021-01-18 12:41:20,NaT
1,2,Gatecreeper,,2021-01-18 12:41:20,NaT
2,3,Agnosy,,2021-01-18 12:41:20,NaT
3,4,Witch Vomit,,2021-01-18 12:41:21,NaT


In [28]:
for result in session.query(Record).filter(Record.title == "Deserted").all():
    print(result.labels)

[<Label(label_id=2, label_name=Relapse Records)>]


In [None]:
TODO: On Updates the old values are not overwritten in many to many relationships (-->Labels) ...