# Desert Rock bands analysis with JSON, GraphQL, and SQL/PGQ

The cell simply loads any needed libraries for the notebook to work. It's mandatory and has nothing to do with the lab itself

In [1]:
import oracledb
import cx_Oracle
from pyvis.network import Network
from prettytable import PrettyTable
from IPython.display import HTML, display
import pandas as pd
import json
import MWMusicalArtist
import sys
import time
import json
import logging
from HelperFunctions import execute_plsql_and_dbmsoutput, execute_plsql, render_graph, render_query, compare_performance
from Artist import NoMusicalInfoboxException
from MongoFactory import mongo_db

FORMAT = '%(asctime)s - %(levelname)-8s - %(funcName)-15s - %(message)s'
logging.basicConfig(format=FORMAT, level=logging.ERROR)

%reload_ext sql
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False
pd.set_option('display.max_colwidth', None)

pd.set_option('display.max_colwidth', None)

# The following is just a helper function to get the curren connection
def get_notebook_oracle_connection() -> object:
    # Ask ipython-sql for the current connection and assuming we want the first one
    connections = %sql l / --connections
    alchemy_connection = connections[next(iter(connections))]
    return alchemy_connection.internal_connection.connection.dbapi_connection


# Get the JSON from the Kyuss page

In [4]:
kyuss = MWMusicalArtist.MWMusicalArtist("Kyuss").getDict()

In [5]:
print(json.dumps(kyuss, indent=4))

{
    "name": "Kyuss",
    "image": "OldKyuss.jpg",
    "caption": "Kyuss {{circa|1992}}. Left to right: [[Josh Homme]], [[Brant Bjork]], [[John Garcia (singer)|John Garcia]], [[Nick Oliveri]].",
    "landscape": "Yes",
    "background": "group_or_band",
    "origin": "[[Palm Desert, California]], U.S.",
    "alias": [
        {
            "name": "Katzenjammer (1987\u20131989)"
        },
        {
            "name": "Sons of Kyuss (1989\u20131991)"
        }
    ],
    "discography": "[[Kyuss discography]]",
    "genre": [
        {
            "link": "Stoner rock",
            "name": "Stoner rock"
        },
        {
            "link": "Palm Desert Scene",
            "name": "desert rock"
        },
        {
            "link": "Heavy metal music",
            "name": "heavy metal"
        }
    ],
    "years_active": "1987\u20131995",
    "label": [
        {
            "name": "Dali"
        },
        {
            "link": "Chameleon Records",
            "name": "Chamel

Get JSON from the Brant Bjork page

In [6]:
brant = MWMusicalArtist.MWMusicalArtist("Brant Bjork").getDict()

In [7]:
print(json.dumps(brant, indent=4))

{
    "name": "Brant Bjork",
    "image": "Brant Bjork-Kyuss-IMG 5771.jpg",
    "caption": "Brant Bjork at the Eurock\u00e9ennes de [[Belfort]], 2011",
    "birth_date": "{{Birth date and age|1973|03|19}}",
    "birth_place": "[[Redlands, California]], U.S.",
    "origin": "[[Palm Springs, California]], U.S.",
    "genre": [
        {
            "link": "Stoner rock",
            "name": "Stoner rock"
        },
        {
            "link": "Palm Desert Scene",
            "name": "desert rock"
        },
        {
            "link": "Heavy metal music",
            "name": "heavy metal"
        },
        {
            "link": "hardcore punk",
            "name": "hardcore punk"
        }
    ],
    "discography": "[[Brant Bjork discography]]",
    "occupation": [
        {
            "name": "Musician"
        },
        {
            "name": "singer"
        },
        {
            "name": "songwriter"
        },
        {
            "name": "record producer"
        }
    ],


# Insert the raw JSON into MongoDB

In [8]:
mongo_artist = mongo_db["mongo_artist"]
mongo_artist.drop()
res = mongo_artist.insert_one(kyuss)
res = mongo_artist.insert_one(brant)


In [9]:
for document in list(mongo_artist.find()):
  print(document["name"])

Kyuss
Brant Bjork


# Problems of the RAW approach

* Unstructured data requires schema validation (possible in MongoDB)

# Problems of the JSON approach

* JSON documents are cool as far as you keep accessing data using the same path

* 

A better approach would be to convert our model to relational... Artists, members, genres, labels...

**I would like to avoid rewriting my code to change the model**

In [10]:
%sql oracle+oracledb://bands:BandsBands##123@bands0.dbbands/?service_name=pbands_rw

In [20]:
%%sql
select a.data.name from mongo_artist a

Unnamed: 0,name
0,Kyuss
1,Brant Bjork


In [23]:
%%sql
select a.data.name, coalesce(a.data.past_members[*].name, a.data.past_member_of[*].name) as "member or member_of" from mongo_Artist a

Unnamed: 0,name,member or member_of
0,Kyuss,"[Josh Homme, John Garcia, Scott Reeder, Alfredo Hernández, Chris Cockrell, Brant Bjork, Nick Oliveri]"
1,Brant Bjork,"[Kyuss, Fu Manchu, Vista Chino, Brant Bjork and the Bros, Brant Bjork and the Low Desert Punk Band, Ché, Mondo Generator, The Desert Sessions, De-Con, LAB, Ten East, Fatso Jetson]"


last keep me

In [40]:
%%sql
update new_customers c
set c.gold_customer = true
from new_orders o
where o.customer_id = c.id
and o.total_value > 1000


And querying the database we can now see that "John Smith" has "True" for gold_customer

In [41]:
%%sql
select c.id, c.first_name, c.last_name, c.email, c.address, c.zip, c.gold_customer, sum(o.total_value) as "Total Orders Value"
from new_customers c, new_orders o
where o.customer_id = c.id
group by c.id, c.first_name, c.last_name, c.email, c.address, c.zip, c.gold_customer

Unnamed: 0,id,first_name,last_name,email,address,zip,gold_customer,Total Orders Value
0,100001,Dom,Giles,dg7889@gmail.com,10 smith street,34454-1667,False,10.23
1,223223,John,Smith,jsmith@hotmail.com,the grove,28902,True,1200.0
2,238121,janet,white,jw123@gmail.com,"apartment 256, 120 east street",18092-7980,False,110.1
3,78993,SUE,GRAY,sue_gray@gmail.com,2345 main street,34454,False,20.5


In [42]:
%%sql
-- New in Oracle 23c

select TO_CHAR(avg(warranty)) as "Average Product Warranty"
from new_orders

Unnamed: 0,Average Product Warranty
0,+000000002-03


The result above shows that the average warranty was 2 years and 3 months.

The simpled syntax above is in stark comparison to the difficult to manage syntax found prior to Oracle Database 23c

In [43]:
%%sql
-- Prior to Oracle 23c

select to_char(NUMTOYMINTERVAL(AVG((EXTRACT(YEAR FROM warranty)*12) + (EXTRACT(MONTH FROM warranty))), 'month')) as "Average Product Warranty"
from new_orders


Unnamed: 0,Average Product Warranty
0,+000000002-03
