### Database Project

Importing the tools that we need:

In [1]:
import sqlite3
import pandas as pd
import numpy as np
import os
from random import choice
from sql import *


Importing the new dataframe from a csv file:

In [2]:
df_new = pd.read_csv("https://raw.githubusercontent.com/mahkaila/songnames/master/SongCSV.csv")
df_new.head()

Unnamed: 0,SongNumber,SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,Title,Year
0,1,b'SOVLGJY12A8C13FBED',223563,b'Call of the Mastodon',b'ARMQHX71187B9890D3',,b'Atlanta GA',,b'Mastodon',0.0,280.21506,5,0.555,173.205,5,0.12,b'Deep Sea Creature',2001
1,2,b'SOMZWCG12A8C13C480',300848,b'Fear Itself',b'ARD7TVE1187B99BFB1',,b'California - LA',,b'Casual',0.0,218.93179,1,0.736,92.198,4,0.778,"bI Didn't Mean To""""",0
2,3,b'SOCIWDW12A8C13D406',300822,b'Dimensions',b'ARMJAGH1187FB546F3',35.14968,b'Memphis TN',-90.04892,b'The Box Tops',0.0,148.03546,6,0.169,121.274,4,0.384,b'Soul Deep',1969
3,4,b'SOXVLOJ12AB0189215',514953,b'Las Numero 1 De La Sonora Santanera',b'ARKRRTF1187B9984DA',,b'',,b'Sonora Santanera',0.0,177.47546,8,0.643,100.07,1,0.0,b'Amor De Cabaret',0
4,5,b'SONHOTT12A8C13493C',287650,b'Friend Or Foe',b'AR7G5I41187FB4CE6C',,b'London England',,b'Adam Ant',0.0,233.40363,0,0.751,119.293,4,0.0,b'Something Girls',1982


Selecting only the necessary Variables to add

In [3]:
# add Artist method:
# ArtistId FK added automatically
df_new["ArtistName"] = df_new["ArtistName"].str[2:-1] # Name in artists table

# add Album 
# AlbumId FK added automatically
# ArtistId FK as input
df_new["AlbumName"] = df_new["AlbumName"].str[2:-1]  # Title in albums table

# add Track
# TrackId FK added automatically
# AlbumId FK as input
df_new["Title"] = df_new["Title"].str[2:-1]  # Title in tracks table

df_new = df_new[["ArtistName", "AlbumName", "Title"]]
df_new.head()

Unnamed: 0,ArtistName,AlbumName,Title
0,Mastodon,Call of the Mastodon,Deep Sea Creature
1,Casual,Fear Itself,"Didn't Mean To"""
2,The Box Tops,Dimensions,Soul Deep
3,Sonora Santanera,Las Numero 1 De La Sonora Santanera,Amor De Cabaret
4,Adam Ant,Friend Or Foe,Something Girls


In [8]:
len(df_new["ArtistName"].unique())
len(df_new["ArtistName"])

10001

In [9]:
pd.concat(g for _, g in df_new.groupby("ArtistName") if len(g) > 1)

Unnamed: 0,ArtistName,AlbumName,Title
4223,"'Naan""",The Dusty Foot On The Road,Is It A Myth?
7122,"'Naan""",The Dusty Foot On The Road,Smile
1676,"'s Choice""",Paradise In Me,My Record Company
2159,"'s Choice""",Live,Another Year
4236,"'s Choice""",Paradise In Me,A Sound That Only You Can Hear
...,...,...,...
1782,"uns N' Roses""",Use Your Illusion II,Locomotive (Complicity)
6729,"uns N' Roses""",Chinese Democracy,Catcher In The Rye
7347,"uns N' Roses""","ive Era '87-'93""","ou're Crazy"""
8846,"uns N' Roses""","ive Era '87-'93""","weet Child O' Mine"""


First Method: slower but more neat!

In [5]:
os.remove("chinook.db")

In [6]:
db = Chinook("chinook.db")

In [8]:
artists = df_new["ArtistName"].unique()

for artist in artists:
    artistId = db.addArtist(name = artist) # add all unique artists to the database
    albums = df_new[df_new["ArtistName"] == artist]["AlbumName"].unique()
    
    for album in albums:
        albumId = db.addAlbum(title = album, artistId = artistId)
        tracks = df_new[(df_new["ArtistName"] == artist) & (df_new["AlbumName"] == album)]["Title"].unique()
        
        for track in tracks:
            db.addTrack(name = track, albumId = albumId)

Access to the database created:

In [9]:
q = '''
    SELECT
        t.Name AS TrackName,
        a.Title AS AlbumTitle, 
        ar.Name AS ArtistName

    FROM tracks t 
    INNER JOIN albums a ON t.AlbumId = a.AlbumId
    INNER JOIN artists ar ON a.ArtistId = ar.ArtistId
'''

con = sqlite3.connect("chinook.db")  # enable the connection with the database
df = pd.read_sql_query(q, con)  # apply the query and import in Pandas
df.head()

Unnamed: 0,TrackName,AlbumTitle,ArtistName
0,Deep Sea Creature,Call of the Mastodon,Mastodon
1,Shadows That Move,Call of the Mastodon,Mastodon
2,The Czar: Usurper/Escape/Martyr/Spiral (Album ...,Crack The Skye,Mastodon
3,Megalodon,Leviathan,Mastodon
4,"Didn't Mean To""",Fear Itself,Casual


In [3]:
os.remove("chinook2.db") # the way to remove the database quickly

In [11]:
df.shape

(9984, 3)

Second Method for updatiting a database:

In [56]:
artists = df_new["ArtistName"].unique()
tot_albums = np.array(["Fottiti"])
ar_albums = np.array(["Fottiti"])
tot_tracks = np.array(["Fottiti"])
al_tracks = np.array(["Fottiti"])

for artist in artists:
    albums = df_new[df_new["ArtistName"] == artist]["AlbumName"].unique()
    tot_albums = np.append(tot_albums, albums)
    ar_albums = np.append(ar_albums, np.repeat(artist, len(albums)))

    for album in albums:
        tracks = df_new[(df_new["ArtistName"] == artist) & (df_new["AlbumName"] == album)]["Title"].unique()
        tot_tracks = np.append(tot_tracks, tracks)
        al_tracks = np.append(al_tracks, np.repeat(album, len(tracks)))


tot_albums = tot_albums[1:]
ar_albums = ar_albums[1:]
tot_tracks = tot_tracks[1:]
al_tracks = al_tracks[1:]

In [67]:
artists_df = pd.DataFrame()
artists_df["artists"] = artists
artists_df["artistId"] = np.arange(len(artists)) + 1
artists_df.head()

Unnamed: 0,artists,artistId
0,Mastodon,1
1,Casual,2
2,The Box Tops,3
3,Sonora Santanera,4
4,Adam Ant,5


In [63]:
db = Chinook("chinook2.db")

In [69]:
for i in range(len(artists_df)):
    db.addArtist(name = artists_df["artists"][i])  

In [79]:
albums_df = pd.DataFrame()
albums_df['albums'] = tot_albums
albums_df['artists'] = ar_albums
albums_df['artistId'] = None

for i in range(len(albums_df['artists'])):
    artist = albums_df['artists'][i]
    albums_df['artistId'][i] = int(artists_df[artists_df["artists"] == artist]["artistId"])
albums_df.head(n = 10)

Unnamed: 0,albums,artists,artistId
0,Call of the Mastodon,Mastodon,1
1,Crack The Skye,Mastodon,1
2,Leviathan,Mastodon,1
3,Fear Itself,Casual,2
4,The Building,Casual,2
5,Casual,Casual,2
6,Truck Driver,Casual,2
7,Il\xc2\xb7luminacions,Casual,2
8,Dimensions,The Box Tops,3
9,The Letter/Neon Rainbow,The Box Tops,3


In [80]:
for i in range(len(albums_df["albums"])):
    db.addAlbum(title  = albums_df["albums"][i], artistId = albums_df["artistId"][i])