**Modules**

In [1]:
import pandas as pd

from configparser import ConfigParser
from mysql.connector import MySQLConnection,Error
from datetime import datetime

**Config reading and parsing**

In [2]:
# reading database credentials from config.ini file

def read(filename='config.ini',section='mysql'):
    parser=ConfigParser()
    parser.read(filename)
    
    db={}
    
    if parser.has_section(section):
        items=parser.items(section)
        for item in items:
            db[item[0]]=item[1]
    else:
        raise Exception(f'{section} not found in file {filename}')
    return db 


print(read(filename="config.ini",section="mysql"))

{'host': '127.0.0.1', 'port': '99966', 'database': 'books', 'user': 'root', 'password': 'Milla123!'}


**Connecting with MySQL/MariaDB database server and getting the connection and cursor object**

In [3]:
def connect(creds):
    con=None
    try:
        print('Connecting to MySQL database...')
        con=MySQLConnection(**creds)
        
        
        if con.is_connected():
            print('Connection established')
            cus = con.cursor(buffered=True)
        else:
            print('Connection failed')
            
    except Error as e:
        print(e)
    finally:
        return con,cus
    
cn,cs=connect(creds=read(filename="config.ini",section="mysql"))

Connecting to MySQL database...
Connection established


**Database column details**

```
gr_book_id bigint(20) 
book_title text 
book_title_mod text 
book_title_series text 
book_title_series_mod text 
book_description text 
book_ratings_count int(11) 
book_average_rating float 
book_num_pages int(11) 
book_publication_day tinyint(4) 
book_publication_month tinyint(4) 
book_publication_year int(11) 
book_isbn text 
book_isbn13 text 
book_publisher varchar(500) 
book_country_code varchar(100) 
book_language_code varchar(100) 
book_url text 
book_image_url text 
book_link text
```

**Reading the data**

In [4]:
items = pd.read_parquet('../../../../../Datasets/Processed/books_SE_v5.parquet')

In [5]:
items.shape

(2113033, 20)

In [6]:
items.head()

Unnamed: 0,isbn,country_code,language_code,average_rating,description,link,publisher,num_pages,publication_day,isbn13,publication_month,publication_year,url,image_url,book_id,ratings_count,title,title_without_series,mod_title,mod_title_without_series
0,312853122.0,US,,4.0,,https://www.goodreads.com/book/show/5333265-w-...,St. Martin's Press,256,1,9780312853129.0,9,1984,https://www.goodreads.com/book/show/5333265-w-...,https://images.gr-assets.com/books/1310220028m...,5333265,3,W.C. Fields: A Life on Film,W.C. Fields: A Life on Film,wc fields a life on film,wc fields a life on film
1,743509986.0,US,,3.23,"Anita Diamant's international bestseller ""The ...",https://www.goodreads.com/book/show/1333909.Go...,Simon & Schuster Audio,0,1,9780743509985.0,10,2001,https://www.goodreads.com/book/show/1333909.Go...,https://s.gr-assets.com/assets/nophoto/book/11...,1333909,10,Good Harbor,Good Harbor,good harbor,good harbor
2,,US,eng,4.03,Omnibus book club edition containing the Ladie...,https://www.goodreads.com/book/show/7327624-th...,"Nelson Doubleday, Inc.",600,0,,0,1987,https://www.goodreads.com/book/show/7327624-th...,https://images.gr-assets.com/books/1304100136m...,7327624,140,"The Unschooled Wizard (Sun Wolf and Starhawk, ...","The Unschooled Wizard (Sun Wolf and Starhawk, ...",the unschooled wizard sun wolf and starhawk 12,the unschooled wizard sun wolf and starhawk 12
3,743294297.0,US,eng,3.49,Addie Downs and Valerie Adler were eight when ...,https://www.goodreads.com/book/show/6066819-be...,Atria Books,368,14,9780743294294.0,7,2009,https://www.goodreads.com/book/show/6066819-be...,https://s.gr-assets.com/assets/nophoto/book/11...,6066819,51184,Best Friends Forever,Best Friends Forever,best friends forever,best friends forever
4,850308712.0,US,,3.4,,https://www.goodreads.com/book/show/287140.Run...,,0,0,9780850308716.0,0,0,https://www.goodreads.com/book/show/287140.Run...,https://images.gr-assets.com/books/1413219371m...,287140,15,Runic Astrology: Starcraft and Timekeeping in ...,Runic Astrology: Starcraft and Timekeeping in ...,runic astrology starcraft and timekeeping in t...,runic astrology starcraft and timekeeping in t...


**Dataframe column names and corresponding database column names**
- book_id -> gr_book_id 
- title_without_series -> book_title
- mod_title_without_series -> book_title_mod
- title -> book_title_series
- mod_title -> book_title_series_mod
- description -> book_description
- ratings_count -> book_ratings_count
- average_rating -> book_average_rating
- num_pages -> book_num_pages
- publication_day -> book_publication_day
- publication_month -> book_publication_month
- publication_year -> book_publication_year
- isbn -> book_isbn
- isbn13 -> book_isbn13
- publisher -> book_publisher
- country_code -> book_country_code
- language_code -> book_language_code
- url -> book_url
- link -> book_link

**Database column names list**

In [7]:
database_columns = [
"gr_book_id",
"book_title",
"book_title_mod",
"book_title_series",
"book_title_series_mod",
"book_description",
"book_ratings_count",
"book_average_rating",
"book_num_pages",
"book_publication_day",
"book_publication_month",
"book_publication_year",
"book_isbn",
"book_isbn13",
"book_publisher",
"book_country_code",
"book_language_code",
"book_url",
"book_image_url",
"book_link",
]

len(database_columns)

20

**Dataframe columns**

In [8]:
print(items.columns)
len(items.columns)

Index(['isbn', 'country_code', 'language_code', 'average_rating',
       'description', 'link', 'publisher', 'num_pages', 'publication_day',
       'isbn13', 'publication_month', 'publication_year', 'url', 'image_url',
       'book_id', 'ratings_count', 'title', 'title_without_series',
       'mod_title', 'mod_title_without_series'],
      dtype='object')


20

**Properly arranging the dataframe column names as per database columns**

In [9]:
dataframe_columns = [
"book_id",
"title_without_series",
"mod_title",
"title",
"mod_title_without_series",
"description",
"ratings_count",
"average_rating",
"num_pages",
"publication_day",
"publication_month",
"publication_year",
"isbn",
"isbn13",
"publisher",
"country_code",
"language_code",
"url",
"image_url",
"link"
]

len(dataframe_columns)

20

In [25]:
items.loc[0, dataframe_columns]

book_id                                                               5333265
title_without_series                              W.C. Fields: A Life on Film
mod_title                                            wc fields a life on film
title                                             W.C. Fields: A Life on Film
mod_title_without_series                             wc fields a life on film
description                                                                  
ratings_count                                                               3
average_rating                                                            4.0
num_pages                                                                 256
publication_day                                                             1
publication_month                                                           9
publication_year                                                         1984
isbn                                                            

In [26]:
items.loc[0, dataframe_columns].values

array(['5333265', 'W.C. Fields: A Life on Film',
       'wc fields a life on film', 'W.C. Fields: A Life on Film',
       'wc fields a life on film', '', 3, 4.0, 256, 1, 9, 1984,
       '0312853122', '9780312853129', "St. Martin's Press", 'US', '',
       'https://www.goodreads.com/book/show/5333265-w-c-fields',
       'https://images.gr-assets.com/books/1310220028m/5333265.jpg',
       'https://www.goodreads.com/book/show/5333265-w-c-fields'],
      dtype=object)

**Checking the column datatypes before storing into the database**

In [27]:
for i in dataframe_columns:
    print(f"{i} ->\t {type(items.loc[0, i])}")

book_id ->	 <class 'str'>
title_without_series ->	 <class 'str'>
mod_title ->	 <class 'str'>
title ->	 <class 'str'>
mod_title_without_series ->	 <class 'str'>
description ->	 <class 'str'>
ratings_count ->	 <class 'numpy.int64'>
average_rating ->	 <class 'numpy.float64'>
num_pages ->	 <class 'numpy.int64'>
publication_day ->	 <class 'numpy.int64'>
publication_month ->	 <class 'numpy.int64'>
publication_year ->	 <class 'numpy.int64'>
isbn ->	 <class 'str'>
isbn13 ->	 <class 'str'>
publisher ->	 <class 'str'>
country_code ->	 <class 'str'>
language_code ->	 <class 'str'>
url ->	 <class 'str'>
image_url ->	 <class 'str'>
link ->	 <class 'str'>


In [57]:
proc_name = "sp_create_book"
proc_args_initial = [0,1]
proc_args_end = [0]

procs_args_complete = []
procs_args_complete.extend(proc_args_initial)
procs_args_complete.extend(items.loc[0, dataframe_columns].values)
procs_args_complete.extend(proc_args_end)

In [58]:
procs_args_complete

[0,
 1,
 '5333265',
 'W.C. Fields: A Life on Film',
 'wc fields a life on film',
 'W.C. Fields: A Life on Film',
 'wc fields a life on film',
 '',
 3,
 4.0,
 256,
 1,
 9,
 1984,
 '0312853122',
 '9780312853129',
 "St. Martin's Press",
 'US',
 '',
 'https://www.goodreads.com/book/show/5333265-w-c-fields',
 'https://images.gr-assets.com/books/1310220028m/5333265.jpg',
 'https://www.goodreads.com/book/show/5333265-w-c-fields',
 0]

**Python 'numpy.int64' or 'numpy.float64' cannot be converted to a MySQL type**
- We will convert `int64` and `float64` into `int` and `float`

In [59]:
for i, val in enumerate(procs_args_complete):
    print(f"{i}->\t{val}->\t\t{type(val)}")

0->	0->		<class 'int'>
1->	1->		<class 'int'>
2->	5333265->		<class 'str'>
3->	W.C. Fields: A Life on Film->		<class 'str'>
4->	wc fields a life on film->		<class 'str'>
5->	W.C. Fields: A Life on Film->		<class 'str'>
6->	wc fields a life on film->		<class 'str'>
7->	->		<class 'str'>
8->	3->		<class 'numpy.int64'>
9->	4.0->		<class 'numpy.float64'>
10->	256->		<class 'numpy.int64'>
11->	1->		<class 'numpy.int64'>
12->	9->		<class 'numpy.int64'>
13->	1984->		<class 'numpy.int64'>
14->	0312853122->		<class 'str'>
15->	9780312853129->		<class 'str'>
16->	St. Martin's Press->		<class 'str'>
17->	US->		<class 'str'>
18->	->		<class 'str'>
19->	https://www.goodreads.com/book/show/5333265-w-c-fields->		<class 'str'>
20->	https://images.gr-assets.com/books/1310220028m/5333265.jpg->		<class 'str'>
21->	https://www.goodreads.com/book/show/5333265-w-c-fields->		<class 'str'>
22->	0->		<class 'int'>


In [60]:
def tointfloat(data):
    data[8] = int(data[8])
    data[9] = float(data[9])
    data[10] = int(data[10])
    data[11] = int(data[11])
    data[12] = int(data[12])
    data[13] = int(data[13])

    return data

In [61]:
procs_args_complete = tointfloat(procs_args_complete)

In [62]:
for i, val in enumerate(procs_args_complete):
    print(f"{i}->\t{val}->\t\t{type(val)}")

0->	0->		<class 'int'>
1->	1->		<class 'int'>
2->	5333265->		<class 'str'>
3->	W.C. Fields: A Life on Film->		<class 'str'>
4->	wc fields a life on film->		<class 'str'>
5->	W.C. Fields: A Life on Film->		<class 'str'>
6->	wc fields a life on film->		<class 'str'>
7->	->		<class 'str'>
8->	3->		<class 'int'>
9->	4.0->		<class 'float'>
10->	256->		<class 'int'>
11->	1->		<class 'int'>
12->	9->		<class 'int'>
13->	1984->		<class 'int'>
14->	0312853122->		<class 'str'>
15->	9780312853129->		<class 'str'>
16->	St. Martin's Press->		<class 'str'>
17->	US->		<class 'str'>
18->	->		<class 'str'>
19->	https://www.goodreads.com/book/show/5333265-w-c-fields->		<class 'str'>
20->	https://images.gr-assets.com/books/1310220028m/5333265.jpg->		<class 'str'>
21->	https://www.goodreads.com/book/show/5333265-w-c-fields->		<class 'str'>
22->	0->		<class 'int'>


In [63]:
book_details = cs.callproc(procname=proc_name, args=tointfloat(procs_args_complete))

In [64]:
book_details

(0,
 1,
 '5333265',
 'W.C. Fields: A Life on Film',
 'wc fields a life on film',
 'W.C. Fields: A Life on Film',
 'wc fields a life on film',
 '',
 3,
 Decimal('4.0'),
 256,
 1,
 9,
 1984,
 '0312853122',
 '9780312853129',
 "St. Martin's Press",
 'US',
 '',
 'https://www.goodreads.com/book/show/5333265-w-c-fields',
 'https://images.gr-assets.com/books/1310220028m/5333265.jpg',
 'https://www.goodreads.com/book/show/5333265-w-c-fields',
 4)

In [65]:
cn.commit()

- Note: 
  - Although `book_id` is `string` in here but it gets converted into `int` internally in the database

**Fetching the books data using proc - `sp_get_book`**

In [66]:
proc_name = "sp_get_book"

proc_args_initial = [0]
proc_args_rest = [
    None,
    None,
    None,
    None,
    None,
    None,
    ]

procs_args_complete = []
procs_args_complete.extend(proc_args_initial)
procs_args_complete.extend(proc_args_rest)

In [67]:
procs_args_complete

[0, None, None, None, None, None, None]

In [68]:
cs.callproc(proc_name, procs_args_complete)

(0, None, None, None, None, None, None)

In [69]:
books = [r.fetchone() for r in cs.stored_results()]

In [70]:
books

[(4,
  5333265,
  'W.C. Fields: A Life on Film',
  'wc fields a life on film',
  'W.C. Fields: A Life on Film',
  'wc fields a life on film',
  '',
  3,
  4.0,
  256,
  1,
  9,
  1984,
  '0312853122',
  '9780312853129',
  "St. Martin's Press",
  'US',
  '',
  'https://www.goodreads.com/book/show/5333265-w-c-fields',
  'https://images.gr-assets.com/books/1310220028m/5333265.jpg',
  'https://www.goodreads.com/book/show/5333265-w-c-fields',
  1,
  1,
  datetime.datetime(2023, 12, 22, 12, 35, 9),
  1,
  datetime.datetime(2023, 12, 22, 12, 35, 9))]