# Desert Rock bands analysis with JSON, GraphQL, and SQL/PGQ

The cell simply loads any needed libraries for the notebook to work. It's mandatory and has nothing to do with the lab itself

In [2]:
import oracledb
import cx_Oracle
from pyvis.network import Network
from prettytable import PrettyTable
from IPython.display import HTML, display
import pandas as pd
import json
import MWMusicalArtist
import sys
import time
import json
import logging
from HelperFunctions import execute_plsql_and_dbmsoutput, execute_plsql, render_graph, render_query, compare_performance
from Artist import NoMusicalInfoboxException
from MongoFactory import mongo_db

FORMAT = '%(asctime)s - %(levelname)-8s - %(funcName)-15s - %(message)s'
logging.basicConfig(format=FORMAT, level=logging.ERROR)

%reload_ext sql
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False
pd.set_option('display.max_colwidth', None)

pd.set_option('display.max_colwidth', None)

# The following is just a helper function to get the curren connection
def get_notebook_oracle_connection() -> object:
    # Ask ipython-sql for the current connection and assuming we want the first one
    connections = %sql l / --connections
    alchemy_connection = connections[next(iter(connections))]
    return alchemy_connection.internal_connection.connection.dbapi_connection


# Get the JSON from the Kyuss page

In [3]:
kyuss = MWMusicalArtist.MWMusicalArtist("Kyuss").getDict()

In [4]:
print(json.dumps(kyuss, indent=4))

{
    "name": "Kyuss",
    "image": "OldKyuss.jpg",
    "caption": "Kyuss {{circa|1992}}. Left to right: [[Josh Homme]], [[Brant Bjork]], [[John Garcia (singer)|John Garcia]], [[Nick Oliveri]].",
    "landscape": "Yes",
    "background": "group_or_band",
    "origin": "[[Palm Desert, California]], U.S.",
    "alias": [
        {
            "name": "Katzenjammer (1987\u20131989)"
        },
        {
            "name": "Sons of Kyuss (1989\u20131991)"
        }
    ],
    "discography": "[[Kyuss discography]]",
    "genre": [
        {
            "link": "Stoner rock",
            "name": "Stoner rock"
        },
        {
            "link": "Palm Desert Scene",
            "name": "desert rock"
        },
        {
            "link": "Heavy metal music",
            "name": "heavy metal"
        }
    ],
    "years_active": "1987\u20131995",
    "label": [
        {
            "name": "Dali"
        },
        {
            "link": "Chameleon Records",
            "name": "Chamel

Get JSON from the Brant Bjork page

In [5]:
brant = MWMusicalArtist.MWMusicalArtist("Brant Bjork").getDict()

In [6]:
print(json.dumps(brant, indent=4))

{
    "name": "Brant Bjork",
    "image": "Brant Bjork-Kyuss-IMG 5771.jpg",
    "caption": "Brant Bjork at the Eurock\u00e9ennes de [[Belfort]], 2011",
    "birth_date": "{{Birth date and age|1973|03|19}}",
    "birth_place": "[[Redlands, California]], U.S.",
    "origin": "[[Palm Springs, California]], U.S.",
    "genre": [
        {
            "link": "Stoner rock",
            "name": "Stoner rock"
        },
        {
            "link": "Palm Desert Scene",
            "name": "desert rock"
        },
        {
            "link": "Heavy metal music",
            "name": "heavy metal"
        },
        {
            "link": "hardcore punk",
            "name": "hardcore punk"
        }
    ],
    "discography": "[[Brant Bjork discography]]",
    "occupation": [
        {
            "name": "Musician"
        },
        {
            "name": "singer"
        },
        {
            "name": "songwriter"
        },
        {
            "name": "record producer"
        }
    ],


# Insert the raw JSON into MongoDB

In [7]:
mongo_artist = mongo_db["mongo_artist"]
mongo_artist.drop()
res = mongo_artist.insert_one(kyuss)
res = mongo_artist.insert_one(brant)

In [8]:
for document in list(mongo_artist.find()):
  print(document["name"])

Kyuss
Brant Bjork


# Problems of the RAW approach

* Semi-structured data requires schema validation (possible in MongoDB).

# Problems of the JSON approach

* JSON documents are cool as long as you keep accessing data using the same path.  (e.g. Band -> Member)
  
  Don't believe me? Check out this: https://dev.to/mongodb/mongodb-design-reviews-how-applying-schema-design-best-practices-resulted-in-a-60x-performance-improvement-56m5
  
# Better approach: use the relational model... Artists (or bands + musicians), members, spinoffs, genres, labels...

* More evolutive
  * declarative language abstracts how data is accessed
  * The impedance mismatch between the application and the database isn't the same across applications accessing the same data

# But... 

* **I would like to avoid rewriting my code to change the model**

# I cheated... we are on an Oracle Database already!

In [9]:
%sql oracle+oracledb://bands:BandsBands##123@bands0.dbbands/?service_name=pbands_rw

In [17]:
%%sql
select column_name, data_type from user_tab_columns where table_name='MONGO_ARTIST'

Unnamed: 0,column_name,data_type
0,ID,RAW
1,CREATED_ON,TIMESTAMP(6)
2,LAST_MODIFIED,TIMESTAMP(6)
3,VERSION,VARCHAR2
4,DATA,JSON


In [10]:
%%sql
select a.data.name from mongo_artist a

Unnamed: 0,name
0,Kyuss
1,Brant Bjork


In [11]:
%%sql
select a.data.name, coalesce(a.data.past_members[*].name, a.data.past_member_of[*].name) as "member or member_of" from mongo_Artist a

Unnamed: 0,name,member or member_of
0,Kyuss,"[Josh Homme, John Garcia, Scott Reeder, Alfredo Hernández, Chris Cockrell, Brant Bjork, Nick Oliveri]"
1,Brant Bjork,"[Kyuss, Fu Manchu, Vista Chino, Brant Bjork and the Bros, Brant Bjork and the Low Desert Punk Band, Ché, Mondo Generator, The Desert Sessions, De-Con, LAB, Ten East, Fatso Jetson]"


# Build the relational schema

In [20]:
%%sql
select table_name, column_name, data_type from user_tab_columns order by table_name, column_id

Unnamed: 0,table_name,column_name,data_type
0,ARTIST,DATA,JSON
1,ARTISTS,ID,VARCHAR2
2,ARTISTS,NAME,VARCHAR2
3,ARTISTS,LINK,VARCHAR2
4,ARTISTS,TYPE,VARCHAR2
5,ARTISTS,DISCOVERED,BOOLEAN
6,ARTISTS,ERROR,VARCHAR2
7,ARTIST_GENRES,ID,NUMBER
8,ARTIST_GENRES,GENRE_ID,VARCHAR2
9,ARTIST_GENRES,ARTIST_ID,VARCHAR2


# Oracle Database 23c solves the impedance mismatch with the JSON Relational Duality Views 

In [21]:
%%sql
create or replace json relational duality view artist_short as
  artists @insert @update @delete @nocheck
  {
    _id     : id
    name    : name
    link    : link
    type    : type
    discovered : discovered
    error   : error
  }