<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Inititial-Record-Ingestion" data-toc-modified-id="Inititial-Record-Ingestion-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Inititial Record Ingestion</a></span><ul class="toc-item"><li><span><a href="#Connect-To-And-Re-Create-DB" data-toc-modified-id="Connect-To-And-Re-Create-DB-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Connect To And Re-Create DB</a></span></li><li><span><a href="#Read-Back-Up-Files" data-toc-modified-id="Read-Back-Up-Files-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Read Back-Up Files</a></span></li><li><span><a href="#Write-Records-To-DB" data-toc-modified-id="Write-Records-To-DB-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Write Records To DB</a></span></li><li><span><a href="#Check-The-Results" data-toc-modified-id="Check-The-Results-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>Check The Results</a></span></li></ul></li><li><span><a href="#Initial-Credit-Addition" data-toc-modified-id="Initial-Credit-Addition-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Initial Credit Addition</a></span></li></ul></div>

In [1]:
import configparser
import datetime as dt
import sys
import collections
from pathlib import Path
from typing import Union, Optional, List, Tuple

import codebook.EDA as EDA
import codebook.clean as clean
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy
from sqlalchemy import func

In [2]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('raph-base')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 800)

np.random.seed(666)

In [3]:
sys.path.append(str(Path.cwd().parent))

from src.db_declaration2 import Base, Artist, CreditTrx, Genre, Label, Rating, Record
from src import db_functions
from src import db_connect

In [4]:
print(sys.executable)
print(sys.version)
print(f'sqlalchemy {sqlalchemy.__version__}')

C:\Users\r2d4\miniconda3\envs\py3\python.exe
3.8.3 (default, May 19 2020, 06:50:17) [MSC v.1916 64 bit (AMD64)]
sqlalchemy 1.3.17


## Inititial Record Ingestion

In [13]:
CONFIG_PATH = Path.cwd().parent / "config.yaml"

### Connect To And Re-Create DB

<div class="alert alert-block alert-danger">
Attention This Will Delete All Entries In The DB!
</div>

In [6]:
prod_path = r"sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase2.db"


engine = sqlalchemy.create_engine(prod_path)
session = db_connect.create_session(engine)

# Check
engine
session

Engine(sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase2.db)

<sqlalchemy.orm.session.Session at 0x223108c6f40>

In [7]:
# Re-Create
# utils.create_DB_anew(engine, Base)

### Read Back-Up Files

In [54]:
def load_backup_data_from_parquet(
    config_path: Union[Path, str], 
    record_data_name: Union[Path, str],
    trx_data_name: Union[Path, str]
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Load the two back-up files with the record-related and 
    the credit_trx data into Pandas DataFrames.
    """
    
    back_up_params = db_connect.read_yaml(config_path, "BACK_UP")
    rel_path = back_up_params["REL_PATH"]
    target_folder = Path.cwd().parent / rel_path  # TODO CHANGE FOR PROD
    
    df_list = []
    for file in [record_data_name, trx_data_name]:
        full_path = target_folder / file
    
        df = pd.read_parquet(full_path)
        df_list.append(df)
        
    return df_list[0], df_list[1]

In [66]:
record_data, trx_data = load_backup_data_from_parquet(
    CONFIG_PATH, 
    "record_data_2021-02-14-15-58-43.parquet",
    "trx_data_2021-02-14-15-58-44.parquet"
)

In [67]:
record_data.info()
record_data.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 308 entries, 1 to 308
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   artist          308 non-null    object        
 1   artist_country  13 non-null     object        
 2   title           308 non-null    object        
 3   genre           308 non-null    object        
 4   label           308 non-null    object        
 5   year            308 non-null    int64         
 6   record_format   308 non-null    object        
 7   vinyl_color     212 non-null    object        
 8   lim_edition     222 non-null    object        
 9   number          24 non-null     object        
 10  remarks         78 non-null     object        
 11  price           308 non-null    float64       
 12  purchase_date   308 non-null    datetime64[ns]
 13  rating          103 non-null    float64       
 14  is_digitized    308 non-null    bool          
 15  is_act

Unnamed: 0_level_0,artist,artist_country,title,genre,label,year,record_format,vinyl_color,lim_edition,number,remarks,price,purchase_date,rating,is_digitized,is_active
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,Dismember,,Pieces,Death Metal,[NA],1992,"12""",,,,,35.0,1992-01-01,,False,True
2,Dismember,,Skin Her Alive,Death Metal,[NA],1991,"7"" Pic",,,,,15.0,1992-01-01,,False,True
3,Dismember,,Like An Everflowing Stream,Death Metal,[NA],1991,LP,,,,,155.0,1992-01-01,,False,True
4,Dismember,,Where Ironcrosses Flow,Death Metal,[NA],2004,LP,,,,,15.0,1992-01-01,,True,True
5,Merciless,,The Awakening,Death Metal,[NA],1989,LP,,,,"New, Wert geschätzt / 1. der beiden Originalpressungen auf DSP",250.0,1992-01-01,,True,True


In [68]:
EDA.display_nan(record_data)

Unnamed: 0,total,prop,dtype
artist_country,295,95.8%,object
number,284,92.2%,object
remarks,230,74.7%,object
rating,205,66.6%,float64
vinyl_color,96,31.2%,object
lim_edition,86,27.9%,object


In [69]:
# splits_plus_one = record_data[record_data["artist"].str.contains("/")].index
# splits_plus_one

Int64Index([20, 170, 278, 288, 301], dtype='int64', name='record_id')

In [73]:
# TEMPORARY: Bring Artists To List

def split_strings(x):
    if isinstance(x, str):
        x = x.split(" / ")
    return x

record_data["artist"] = record_data["artist"].apply(split_strings)
record_data["artist_country"] = record_data["artist_country"].apply(split_strings)
record_data[record_data.index.isin(splits_plus_one)]

Unnamed: 0_level_0,artist,artist_country,title,genre,label,year,record_format,vinyl_color,lim_edition,number,remarks,price,purchase_date,rating,is_digitized,is_active
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
20,"[Hypocrisy, Immortal]",,Valley Of The Damned / Hordes Of War,Black Metal,[NA],2009,"7"" Pic",,lim,,,25.0,2013-09-01,,False,True
170,[Harm/Shelter],,Paycheck,Hardcore,[NA],2016,LP,clear-black marbled,150,,,15.0,2018-04-01,,False,True
278,"[Iron Reagan, Gatecreeper]",,s/t,Crossover,[Relapse Records],2018,"12""",bone white / blood red merge with black splatter,100,,,45.0,2020-09-01,7.0,False,True
288,"[Mortal Wound, Gutless]",,s/t,Death Metal,[Me Saco Un Ojo Records],2020,"12""",neon pink,,,,15.0,2020-10-01,9.0,False,True
301,"[Coffins, Depression]","[Japan, Germany]",Split,Death Metal,[Hells Headbangers],2021,LP,white with black splatter,100,,,20.0,2021-01-20,,True,True


In [74]:
record_data["artist_country"].value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas\_libs\hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[USA]               6
[Germany]           2
[Canada]            1
[Japan, Germany]    1
[New Zealand]       1
[International]     1
[England]           1
Name: artist_country, dtype: int64

**TEMPORARY TO DOS:**

- fillna in Ratings col because it will generate an FK in the future

### Write Records To DB

**TODO:** Can I apply()?

In [None]:
def insert_df_with_sqlalchemy_orm(session, df):
    for x in df.to_dict("records"):
        db_functions.add_new_record2(session, x)

In [None]:
insert_df_with_sqlalchemy_orm(session, collection_df)

### Check The Results

In [None]:
# Total Number Of Records In The DB
session.query(Record).count()

In [None]:
session.query(Artist).count()
session.query(Artist).all()[-5:]

In [None]:
session.query(Genre).count()
session.query(Genre).all()

In [None]:
session.query(func.sum(CreditTrx.credit_value)).all()[0][0]
session.query(func.min(CreditTrx.credit_value)).all()[0][0]
session.query(func.avg(CreditTrx.credit_value)).all()[0][0]

## Initial Credit Addition

In [None]:
# Add initial Addition trx, check the date an credit_saldo

fake_trx = CreditTrx(
    credit_trx_date=dt.datetime(year=2020, month=12, day=30),
    credit_trx_type="Addition",
    credit_value=1,
    credit_saldo=-1,
    record_id=np.nan
)
session.add(fake_trx)

In [None]:
# Check
session.commit()
session.query(CreditTrx).all()[-3:]

In [None]:
db_functions.add_regular_credits(session)

In [None]:
session.query(CreditTrx).all()[-3:]

In [None]:
# session.rollback()

In [None]:
session.close()