In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from Script import Album, Artist, Music, Genre, MusicArtist, AlbumArtist, GenreArtist, Play, TopMusic, Favorite
from Script.SQLConn import SQLConn
from Script.RAMData import RAMData
from Script.SQLData import SQLData

import json
import csv
from difflib import SequenceMatcher

In [3]:
# Connect to Database
print('Connecting to the Database...')
SQLConn.instance()

# Erase all tables
SQLConn.instance().cursor.execute("DROP TABLE IF EXISTS album,artist,music,genre,music_artist,album_artist,genre_artist,play,top_music,favorite CASCADE;")

# Create tables
sql_createtables_string = open('create_tables.sql', 'r', encoding="utf-8").read()
SQLConn.instance().cursor.execute(sql_createtables_string)

# RAM Started
ramData = RAMData.instance()

# SQL Started
sqlData = SQLData.instance()

# Commit and create Tables
SQLConn.instance().conn.commit()
print('Connected!')

Connecting to the Database...
Connected!


## 1. Listening History

In [4]:
def getTrackId(trackName, artist):
    return (trackName + artist).lower()

def formatName(name):
    return name.replace('\'', '\'\'')

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

In [5]:
# Opening JSON file
f = open('../../Datasets/4. My Spotify Data/MyData/StreamingHistory0.json', encoding="utf8")
data = json.load(f)

for line in data:
    artist = {
        'title': formatName(line['artistName'])
    }
    music = {
        'title': formatName(line['trackName'])
    }
    music_artist = {
        'music_id': Music.getMusicId(music),
        'artist_id': Artist.getArtistId(artist)
    }
    MusicArtist.saveMusicArtist(music_artist)
    play = {
        'music_id': music_artist['music_id'],
        'end_time': line['endTime'],
        'ms_played': line['msPlayed']
    }
    Play.savePlay(play)
    
sqlData.save()

## 2. IDs

In [6]:
ids = json.load(open('../../Datasets/ids_json_updated.json', encoding="utf8"))
ids_artist = json.load(open('../../Datasets/ids_artists_json.json', encoding="utf8"))

ids_artist_inv = {}
for artist in ids_artist:
    ids_artist_inv[ids_artist[artist]['spotifyID']] = artist
    
ids_artist_inv_ge = {}
for artist in ids_artist:
    if 'geniusID' in ids_artist[artist]:
        ids_artist_inv_ge[ids_artist[artist]['geniusID']] = artist
    
ids_inv = {}
for music in ids:
    if 'spotifyID' in ids[music]:
        ids_inv[ids[music]['spotifyID']] = ids[music]
        
ids_inv_ge = {}
for music in ids:
    if 'geniusID' in ids[music]:
        ids_inv_ge[ids[music]['geniusID']] = ids[music]
        

## 3. Spotify Artists

In [7]:
spotify_artists = json.load(open('../../Datasets/spotify_artists_json.json', encoding="utf8"))
for artist_raw in spotify_artists:
    artist = {
        'title': formatName(ids_artist_inv[artist_raw['id']]),
        'artist_spotify_id': artist_raw['id']
    }
    artist_id = Artist.getArtistId(artist)
    artist['artist_id'] = artist_id
    Artist.updateArtist(artist)
    for genre_raw in artist_raw['genres']:
        genre = {
            'genre': formatName(genre_raw)
        }
        genre_id = Genre.getGenreId(genre)
        GenreArtist.saveGenreArtist({'artist_id': artist_id, 'genre_id': genre_id})
        
sqlData.save()

## 4. Spotify Songs

In [8]:
spotify_songs = json.load(open('../../Datasets/spotify_music_json.json', encoding="utf8"))

for song_raw in spotify_songs:
    if ids_inv.get(song_raw['id'], '') != '':
        album = {
            'title': formatName(song_raw['album']['name']),
            'release_date': song_raw['album']['release_date'],
            'total_tracks': song_raw['album']['total_tracks'],
            'album_spotify_id': song_raw['album']['id'],
        }
        album_id = Album.getAlbumId(album)
sqlData.save()

for song_raw in spotify_songs:
    if ids_inv.get(song_raw['id'], '') != '':
        music = {
            'title': formatName(ids_inv[song_raw['id']]['trackName']),
            'duration_ms': song_raw['duration_ms'],
            'popularity': song_raw['popularity'],
            'music_spotify_id': song_raw['id'],
            'album_id': Album.getAlbumId({'album_spotify_id': formatName(song_raw['album']['id'])})
        }
        music_id = Music.getMusicId(music)
        music['music_id'] = music_id
        Music.updateMusic(music)


## 5. Spotify Features

In [9]:
spotify_features = json.load(open('../../Datasets/spotify_features_json.json', encoding="utf8"))

for song_raw in spotify_features:
    if ids_inv.get(song_raw['id'], '') != '':
        music = {
            'title': formatName(ids_inv[song_raw['id']]['trackName']),
            'danceability': song_raw['danceability'],
            'energy': song_raw['energy'],
            'music_key': song_raw['key'],
            'loudness': song_raw['loudness'],
            'mode': song_raw['mode'],
            'speechiness': song_raw['speechiness'],
            'acousticness': song_raw['acousticness'],
            'instrumentalness': song_raw['instrumentalness'],
            'liveness': song_raw['liveness'],
            'valence': song_raw['valence'],
            'tempo': song_raw['tempo'],
            'time_signature': song_raw['time_signature'],
        }
        music_id = Music.getMusicId(music)
        music['music_id'] = music_id
        Music.updateMusic(music)

## 6. Genius Artists

In [10]:
genius_artists = json.load(open('../../Datasets/genius_artists_json.json', encoding="utf8"))
for artist_raw in genius_artists:
    if artist_raw['artist']['id'] in ids_artist_inv_ge:
        if similar(ids_artist_inv_ge[artist_raw['artist']['id']].lower(),  artist_raw['artist']['name'].lower()) > 0.6:
            artist = {
                'title': formatName(ids_artist_inv_ge[artist_raw['artist']['id']]),
                'artist_genius_id': artist_raw['artist']['id'],
                'instagram_name': artist_raw['artist']['instagram_name'],
                'twitter_name': artist_raw['artist']['twitter_name']
            }
            artist_id = Artist.getArtistId(artist)
            artist['artist_id'] = artist_id
            Artist.updateArtist(artist)
#         else:
#             print(str(similar(ids_artist_inv_ge[artist_raw['artist']['id']].lower(),  artist_raw['artist']['name'].lower())) + ': ' + ids_artist_inv_ge[artist_raw['artist']['id']].lower() + ' - ' +  artist_raw['artist']['name'].lower())
        
sqlData.save()

## 7. Genius Songs

In [11]:
genius_songs = json.load(open('../../Datasets/genius_json.json', encoding="utf8"))

for song_raw in genius_songs:
    if ids_inv_ge.get(song_raw['id'], '') != '':
        if similar(ids_inv_ge[song_raw['id']]['trackName'].lower(), song_raw['title'].lower()) > 0.5:
            music = {
                'title': formatName(ids_inv_ge[song_raw['id']]['trackName']),
                'music_genius_id': song_raw['id'],
                'lyrics': formatName(song_raw['lyrics']),
                
            }
            music_id = Music.getMusicId(music)
            music['music_id'] = music_id
            Music.updateMusic(music)
#         else:
#             print(str(similar(ids_inv_ge[song_raw['id']]['trackName'].lower(),  song_raw['title'].lower())) + ': ' + ids_inv_ge[song_raw['id']]['trackName'].lower() + ' - ' +  song_raw['title'].lower())
sqlData.save()

In [12]:
# Save everything else remaining
sqlData.save()

## Commit changes
SQLConn.instance().conn.commit()
print('Done!')

Done!
