In [8]:
import numpy as np
import pandas as pd
import os
import sys
from collections import defaultdict
from importlib import reload
from bs4 import BeautifulSoup
import requests

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [1]:
from pymongo import MongoClient
client = MongoClient()
# Access/Initiate Database
db = client['test_database']
# Access/Initiate Table
tab = db['test_table']

In [4]:
collection = db.tab

In [5]:
idx = collection.insert_one( {'track':'Across the Universe',
                              'artist': 'The Beatles'})

In [6]:
collection.find_one()

{'_id': ObjectId('5c9bd85d991f5208a0897a16'),
 'track': 'Across the Universe',
 'artist': 'The Beatles'}

# Try putting spotify data in a mongodb

In [9]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = os.environ['SPOTIFY_CLIENT_ID']
client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [15]:
track = 'She Loves You'
artist = 'The Beatles'
query = 'track:{} artist:{}'.format(track,artist)
result = sp.search(q=query, type='track')
song_id = result['tracks']['items'][0]['id']
song_info = sp.track(song_id)
song_analysis = sp.audio_analysis(song_id)
song_features = sp.audio_features(song_id)

In [18]:
new_entry = {'track':track,
             'artist':artist,
             'spotify_id':song_id,
             'track_info':song_info,
             'audio_analysis':song_analysis,
             'audio_features':song_features}

collection.insert_one(new_entry)

<pymongo.results.InsertOneResult at 0x1224dab08>

In [22]:
collection.find_one({'track':track})

{'_id': ObjectId('5c9bdb38991f5208a0897a17'),
 'track': 'She Loves You',
 'artist': 'The Beatles',
 'spotify_id': '6nEkxYIEnrbYH7h1hJ8Xn6',
 'track_info': {'album': {'album_type': 'album',
   'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3WrFJ7ztbogyGnTHbHJFl2'},
     'href': 'https://api.spotify.com/v1/artists/3WrFJ7ztbogyGnTHbHJFl2',
     'id': '3WrFJ7ztbogyGnTHbHJFl2',
     'name': 'The Beatles',
     'type': 'artist',
     'uri': 'spotify:artist:3WrFJ7ztbogyGnTHbHJFl2'}],
   'available_markets': ['AD',
    'AE',
    'AR',
    'AT',
    'AU',
    'BE',
    'BG',
    'BH',
    'BO',
    'BR',
    'CA',
    'CH',
    'CL',
    'CO',
    'CR',
    'CY',
    'CZ',
    'DE',
    'DK',
    'DO',
    'DZ',
    'EC',
    'EE',
    'EG',
    'ES',
    'FI',
    'FR',
    'GB',
    'GR',
    'GT',
    'HK',
    'HN',
    'HU',
    'ID',
    'IE',
    'IL',
    'IN',
    'IS',
    'IT',
    'JO',
    'JP',
    'KW',
    'LB',
    'LI',
    'LT',
    'LU',
    'LV',

## Creating an entry for a Mongo DB

We need to create an entry for a Mongo DB with the following features:

    track: The title of the track
    artist: The name of the artist
    album: The name of the album
    producer: Then name of the producer (OUR TARGET)
    audio_analysis: Spotify Audio Analysis
    audio_features: Spotify Audio Features

In [24]:
gm_table = db['george_martin_test']
gm_collection = db.gm_table
gm_collection

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'test_database'), 'gm_table')

In [25]:
producer = 'George Martin'
cat_url = 'https://en.wikipedia.org/wiki/Category:Song_recordings_produced_by_George_Martin'

In [26]:
import src.wiki_scraping as wiki_scraping

In [33]:
reload(wiki_scraping)

<module 'src.wiki_scraping' from '/Users/Maxwell/galvanize/production-value/src/wiki_scraping.py'>

In [34]:
# Initialize Spotify Credentials

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = os.environ['SPOTIFY_CLIENT_ID']
client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [35]:
producer = 'George Martin'
cat_url = 'https://en.wikipedia.org/wiki/Category:Song_recordings_produced_by_George_Martin'


print('Scraping Wikipedia')
spotify_info = wiki_scraping.get_spotify_info_from_wiki(cat_url, sp)

print('Example data:')
print(spotify_info[:5])
print()

print('Extracting Audio Analysis')
print()

idx_list = []


for track, artist, album, song_id, spotify_track, spotify_artist in spotify_info:
    print('Importing {} by {}...'.format(track,artist))
    query = 'track:{} artist:{}'.format(track,artist)
    result = sp.search(q=query, type='track')
    song_id = result['tracks']['items'][0]['id']
    song_info = sp.track(song_id)
    song_analysis = sp.audio_analysis(song_id)
    song_features = sp.audio_features(song_id)
    
    
    new_entry = {'track':track,
                 'artist':artist,
                 'album':album,
                 'producer':producer,
                 'spotify_id':song_id,
                 'track_info':song_info,
                 'audio_analysis':song_analysis,
                 'audio_features':song_features}

    idx = gm_collection.insert_one(new_entry)
    idx_list.append(idx)
    
    print('Import Complete.')
    print()

Scraping Wikipedia
Example data:
[('12-Bar Original', 'the Beatles', 'Anthology 2', '2HvTGx5fzFGpHSyRNvXd9T', '12 Bar Original - Anthology 2 Version', 'The Beatles'), ('Across the Universe', 'the Beatles', "No One's Gonna Change Our World", '4dkoqJrP0L8FXftrMZongF', 'Across The Universe - Remastered 2009', 'The Beatles'), ('Act Naturally', 'Buck Owens and the Buckaroos', 'The Best of Buck Owens', '2LClPTK0FNl4AnOfKUJBQw', 'Act Naturally (Live)', 'Buck Owens & The Buckaroos'), ('Alfie', 'Cilla Black', 'Cilla Black singles chronology', '2IqtBxwRgNOt7YWMmulrUZ', 'Alfie - 2003 Remaster', 'Cilla Black'), ("All I've Got to Do", 'the Beatles', 'With the Beatles', '5tztLBvTlNC15Np2tnQ5Ll', "All I've Got To Do - Remastered 2009", 'The Beatles')]

Extracting Audio Analysis

Importing 12-Bar Original by the Beatles...
Import Complete.

Importing Across the Universe by the Beatles...
Import Complete.

Importing Act Naturally by Buck Owens and the Buckaroos...
Import Complete.

Importing Alfie by C

In [31]:
spotify_info[:5]

[]

In [40]:
gm_collection.find_one({'track':'Nowhere Man'})['audio_features']

[{'danceability': 0.534,
  'energy': 0.624,
  'key': 4,
  'loudness': -9.589,
  'mode': 1,
  'speechiness': 0.0462,
  'acousticness': 0.00797,
  'instrumentalness': 0,
  'liveness': 0.128,
  'valence': 0.687,
  'tempo': 121.402,
  'type': 'audio_features',
  'id': '5SUlhldQJtOhUr2GzH5RI7',
  'uri': 'spotify:track:5SUlhldQJtOhUr2GzH5RI7',
  'track_href': 'https://api.spotify.com/v1/tracks/5SUlhldQJtOhUr2GzH5RI7',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/5SUlhldQJtOhUr2GzH5RI7',
  'duration_ms': 163693,
  'time_signature': 4}]