# Database

In [None]:
%config Completer.use_jedi = False

In [None]:
import pandas as pd
import re

In [None]:
datas = pd.read_json('./tests/Processor_tests.json').T

In [None]:
datas.to_csv('./database.csv', sep=';', encoding='latin-1')

# Converter

## CBR

In [None]:
from comicsreader.converter import cbr2cbz

In [None]:
cbr2cbz('./tests/01 Wasteland.cbr')

## Pdf

In [None]:
from comicsreader.converter import pdf2cbz

In [None]:
pdf2cbz('./tests/Invisible Republic - T01.pdf')

# Dev Meta data

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
from comicsreader.metaprocessor import MetaProcessor as Meta
from comicsreader.metaprocessor import TitleProcessor as Proc
import re

In [None]:
path = 'D:/Mes bds/'

In [None]:
for root, dirs, files in os.walk(path, topdown=False):
    pass

In [None]:
root

In [None]:
files

In [None]:
file = files[3]
file

In [None]:
pattern = re.compile(r"""
                                (?!{)  # ignore when starting with {
                                ([^A-Za-z][0-9]{1,3})
                                (-[^A-Za-z][0-9]{1,3})?
                                (?!})  # ignore when finishing with }

                                """, re.VERBOSE
                                 )
# pattern = '[0-9]{1,3}'
match = re.search(pattern, file)
match

In [None]:
match.groups()

In [None]:
processor = Proc()
processor(file)

In [None]:
file

In [None]:
datas = []
for file in files:
    datas.append(Meta.from_file(file))

processed = pd.DataFrame([data.as_dict() for data in datas])

In [None]:
from ipywidgets import widgets, interactive, interact

pd.set_option("max_colwidth", 100)

@interact(n=list(range(processed.index.stop // 5)))
def sample(n):
    return processed.loc[5*n:5*(n+1), ['file', 'extension', 'date', 'volumes', 'chapters', 'title']]

In [None]:
import json

In [None]:
processed.T.to_json('./tests/Processor_tests.json', indent=4)

In [None]:
processed.to_csv('./database.csv', sep=';', encoding='latin-1')

# Database

In [1]:
import pandas as pd
import re, os
import shutil
import ast
from comicsreader.converter import cbr2cbz, pdf2cbz

%load_ext autoreload
%autoreload 2

In [2]:
def format_list(x, prefix='(', suffix=')'):
    if len(x) == 0:
        result = ''
    elif len(x) < 3:
        result = prefix + '-'.join(map(str, x)) + suffix
    else:
        result = prefix + ', '.join(map(str, x)) + suffix
    return result

def format_title(x):
    dates = format_list(x.dates, '(', ')')
    volumes = format_list(x.volumes, ', T', '')
#     chapters = format_list(x.chapters, ', ', '')
    result = f'{x.title} {dates}{volumes}'
    result = re.sub(' +', ' ', result.strip())
    return result

In [3]:
datas = pd.read_csv('./database_corrected.csv', sep=';', encoding='latin-1', index_col=0)
datas[['dates', 'volumes']] = datas[['dates', 'volumes']].applymap(ast.literal_eval)

In [4]:
datas['formatted_title'] = datas.apply(format_title, axis=1)

In [None]:
# convert

export_path = 'E:/comics_database/'
input_path = 'D:/Mes bds'

for series, group in datas.groupby('title'):
    path = os.path.join(export_path, series)
    if not os.path.exists(path):
        os.makedirs(path)
    
    for _, row in group.iterrows():
        # convert
        file = row.file
        ext = file.split('.')[-1]
        if ext == 'cbz':
            shutil.copy(os.path.join(input_path, file), os.path.join(path, file))
        elif ext == 'cbr':
            cbr2cbz(os.path.join(input_path, file), path)
        elif ext == 'pdf':
            pdf2cbz(os.path.join(input_path, file), path)
            
        # rename
        input_file = file.split('.')[0] + '.cbz'
        output_file = row.formatted_title + '.cbz'
        os.rename(os.path.join(path, input_file), os.path.join(path, output_file))

In [5]:
series_table = pd.DataFrame(datas.title.unique(), columns=['Serie'])
series_table['ID'] = series_table.Serie.apply(hash)
series_map = series_table.set_index('Serie').ID

books_table = datas.copy()
books_table['serie_ID'] = series_map[books_table['title']].values
books_table.drop(columns=['tokenized_file', 'extension', 'title'], inplace=True)
books_table.rename({'formatted_title': 'name'}, axis=1, inplace=True)
books_table['ID']= books_table.name.apply(hash)
books_table[['dates', 'volumes']] = books_table[['dates', 'volumes']].astype(str)

series_table.rename(str.lower, axis=1, inplace=True)
books_table.rename(str.lower, axis=1, inplace=True)

# Sqlalchemy

In [22]:
import sqlalchemy as db
from sqlalchemy import Column, Integer, String, ForeignKey, Table, MetaData
from sqlalchemy.orm import relationship, backref
from sqlalchemy.ext.declarative import declarative_base
from functools import partial

In [7]:
from sqlalchemy import create_engine, text, insert
from sqlalchemy.orm import sessionmaker

In [8]:
engine = create_engine('sqlite+pysqlite:///comics_db.sqlite', echo=True, future=True)
Session = sessionmaker(bind=engine)
session = Session()

In [18]:
Base = declarative_base()
class AsDict:
    __attr__ = []
    
    def as_dict(self):
        return {key: self.__getattribute__(key) for key in self.__attr__}
    
    @classmethod
    def from_series(cls, x: pd.Series):
        return cls(**x[cls.__attr__].to_dict())
    
    def __repr__(self):
        return self.as_dict().__str__()

class Serie(Base, AsDict):
    __tablename__ = 'series'
    __attr__ = ['id', 'serie']
    id = Column(Integer, primary_key=True)
    serie = Column(String)
    books = relationship('Book', backref=backref('series'))    
    
class Book(Base, AsDict):
    __tablename__ = 'books'
    __attr__ = ['id', 'serie_id', 'name', 'dates','volumes']
    id = Column(Integer, primary_key=True)
    serie_id = Column(Integer, ForeignKey('series.id'))
    name = Column(String)
    dates = Column(String)
    volumes = Column(String)
    
series = Table('series', Base.metadata, autoload=True, autoload_with=engine)
books = Table('books', Base.metadata, autoload=True, autoload_with=engine)

In [19]:
meta = MetaData(bind=engine)
meta.reflect(engine)
if len(meta.tables) == 0:
    Base.metadata.create_all(engine)

2021-08-13 18:50:09,807 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-08-13 18:50:09,819 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2021-08-13 18:50:09,821 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-08-13 18:50:09,825 INFO sqlalchemy.engine.Engine ROLLBACK
2021-08-13 18:50:09,830 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-08-13 18:50:09,834 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("series")
2021-08-13 18:50:09,834 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-08-13 18:50:09,834 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("series")
2021-08-13 18:50:09,840 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-08-13 18:50:09,843 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("books")
2021-08-13 18:50:09,843 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-08-13 18:50:09,843 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("books")
2021-08-13 18:50:09,851 INFO sqlalchemy.engine.Engine [raw sql]

In [25]:
def execute_query(engine, query, **args):
    with engine.connect() as conn:
        conn.execute(query, *args)
        conn.commit()

In [20]:
values_to_insert = [Serie.from_series(row) for _, row in series_table.iterrows()]
session.add_all(values_to_insert)
session.commit()

2021-08-13 18:50:26,584 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-08-13 18:50:26,594 INFO sqlalchemy.engine.Engine INSERT INTO series (id, serie) VALUES (?, ?)
2021-08-13 18:50:26,594 INFO sqlalchemy.engine.Engine [generated in 0.00308s] ((-7818973386405687002, 'Authority - Human on the Inside'), (-8809719025296134653, 'Blackgas'), (5377669183114557087, 'Caliban'), (8543239933917734762, 'Critical Role - Vox Machina Origins Series II'), (279211294762406937, 'Ether'), (-7828051170934429068, 'Ether - The Disappearance of Violet Bell'), (-5706947596546639003, 'Evil Empire'), (318029154217467305, 'Fatale Book')  ... displaying 10 of 64 total bound parameter sets ...  (-7049867519448743398, 'Wizard Beach'), (-3412539509003522277, 'Wyrd'))
2021-08-13 18:50:26,606 INFO sqlalchemy.engine.Engine COMMIT


In [21]:
values_to_insert = [Book.from_series(row) for _, row in books_table.iterrows()]
session.add_all(values_to_insert)
session.commit()

2021-08-13 18:50:28,113 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-08-13 18:50:28,121 INFO sqlalchemy.engine.Engine INSERT INTO books (id, serie_id, name, dates, volumes) VALUES (?, ?, ?, ?, ?)
2021-08-13 18:50:28,121 INFO sqlalchemy.engine.Engine [generated in 0.00383s] ((-7818973386405687002, -7818973386405687002, 'Authority - Human on the Inside', '[]', '[]'), (104524395293276692, -8809719025296134653, 'Blackgas (2006)', '[2006]', '[]'), (-3552946770667500403, 5377669183114557087, 'Caliban (2014)', '[2014]', '[]'), (767945623645037230, 8543239933917734762, 'Critical Role - Vox Machina Origins Series II (2020), T5', '[2020]', '[5]'), (1408640591926604528, 8543239933917734762, 'Critical Role - Vox Machina Origins Series II (2019), T1', '[2019]', '[1]'), (-8474208945246161316, 8543239933917734762, 'Critical Role - Vox Machina Origins Series II (2019), T2', '[2019]', '[2]'), (5561919319244272431, 8543239933917734762, 'Critical Role - Vox Machina Origins Series II (2019), T3', '

In [45]:
db.select([series.columns.serie])

<sqlalchemy.sql.selectable.Select object at 0x0000020FF028BB88>

In [41]:
session.query(Serie)

2021-08-13 19:17:07,050 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-08-13 19:17:07,058 INFO sqlalchemy.engine.Engine SELECT series.id AS series_id, series.serie AS series_serie 
FROM series
2021-08-13 19:17:07,061 INFO sqlalchemy.engine.Engine [generated in 0.00257s] ()


[{'id': -8833933816637478755, 'serie': 'Warren Ellis Ignition City'},
 {'id': -8809719025296134653, 'serie': 'Blackgas'},
 {'id': -8513709633002576919, 'serie': 'Undying Love'},
 {'id': -7828051170934429068, 'serie': 'Ether - The Disappearance of Violet Bell'},
 {'id': -7818973386405687002, 'serie': 'Authority - Human on the Inside'},
 {'id': -7444446579655107128, 'serie': 'Rasputin - The Voice of the Dragon'},
 {'id': -7049867519448743398, 'serie': 'Wizard Beach'},
 {'id': -6765444887463260425, 'serie': 'Grendel'},
 {'id': -5706947596546639003, 'serie': 'Evil Empire'},
 {'id': -5630409175883020425, 'serie': 'Warlord of Mars - Dejah Thoris'},
 {'id': -5379907742672526916, 'serie': 'The Sandman Universe'},
 {'id': -5043920036447030823, 'serie': 'Warlord of Mars'},
 {'id': -4721164017087473998, 'serie': 'The Dark North'},
 {'id': -3995961064549379875, 'serie': 'iZombie'},
 {'id': -3826274488361420833, 'serie': 'Norse Mythology'},
 {'id': -3452348545095113292, 'serie': 'The Last One'},
 {

In [None]:
session.close()

# pyunpack

In [None]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

from pyunpack import Archive
from zipfile import ZipFile
import os

# Rarfile

In [None]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

import rarfile.rarfile as rarfile

# rarfile.UNAR_TOOL = './unar/unar.exe'
# rarfile.UNAR_TOOL = './unrar/UNRAR.exe'
rarfile.UNAR_TOOL = './libarchive/bin/bsdtar.exe'

In [None]:
file_path = './01 Wasteland.cbr'
with rarfile.RarFile(file_path) as rf:
    for f in rf.infolist():
        print(f.filename, f.file_size)
        rf.extract(f.orig_filename, './')

In [None]:
f.orig_filename.endswith(b'/')