In [257]:
from selenium import webdriver
from selenium.common.exceptions import ElementNotVisibleException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
import time
import re
from datetime import datetime as dt
from dateutil.relativedelta import relativedelta as time_ago
from dateutil.parser import parser
import pandas as pd
import numpy as np
import requests
import string
import random
import os

In [255]:
class PlayMusicScraper():
    def __init__(self, path_to_chromedriver=None):
        if not path_to_chromedriver:
            path_to_chromedriver = os.path.abspath('chromedriver')
        self.browser = webdriver.Chrome(path_to_chromedriver)
        self.playlist_data = []
        self.playlist_df = None
        self.browser.get('https://play.google.com/music')

    def go_to_playlist_page(self, playlist_page_url='https://play.google.com/music/listen#/wmp'):
        self.browser.get(playlist_page_url)
    
    def scrape_next_playlist(self, playlist_idx):
        playlist_box = self.browser.find_element_by_class_name('material-card-grid')
        playlist = playlist_box.find_elements_by_class_name('material-card')[playlist_idx]
        playlist_name = playlist.text.split('\n')[0]
        print('---> Scraping (#%s) "%s"' % (playlist_idx + 1, playlist_name))
        playlist.click()
        time.sleep(1)
        songs = self.browser.find_elements_by_class_name('song-row')
        for song in songs:
            song_data = song.text.split('\n')
            title, duration, artist, album = song_data[1:5]
            self.playlist_data.append({
                'title': title,
                'duration': duration,
                'artist': artist,
                'album': album,
                'playlist': playlist_name
            })
        self.go_to_playlist_page()

    def scrape_all_playlists(self, save_successful_scrape=True):
        self.go_to_playlist_page()
        time.sleep(1)
        playlist_box = self.browser.find_element_by_class_name('material-card-grid')
        num_playlists = len(playlist_box.find_elements_by_class_name('material-card'))
        print('Found %s playlists to scrape...' % num_playlists)
        for playlist_idx in range(num_playlists):
            self.scrape_next_playlist(playlist_idx)
            time.sleep(1)
        self.playlist_df = pd.DataFrame(self.playlist_data)
        if save_successful_scrape:
            self.playlist_df.to_csv('google-playlists.csv', index=False)

In [262]:
class SpotifyController():
    def __init__(self, path_to_chromedriver=None):
        if not path_to_chromedriver:
            path_to_chromedriver = os.path.abspath('chromedriver')
        self.browser = webdriver.Chrome(path_to_chromedriver)
        self.playlist_df = pd.read_csv('google-playlists.csv', encoding="ISO-8859-1")
        self.browser.get('https://open.spotify.com/collection/playlists')

    def go_to_playlist_page(self, playlist_page_url='https://open.spotify.com/collection/playlists'):
        self.browser.get(playlist_page_url)
        
    def create_new_playlist(self, playlist_name):
        new_playlist_button = self.browser.find_element_by_xpath('//*[@id="main"]/div/div[4]/div[2]/div[1]/section/div[1]/div/div')
        new_playlist_button.click()
        time.sleep(2)
        name_input = self.browser.find_element_by_class_name("inputBox-input")
        name_input.click()
        name_input.send_keys(playlist_name)
        name_input.send_keys(Keys.ENTER)
        time.sleep(2)
        self.go_to_playlist_page()
        time.sleep(2)
    
    def create_all_new_playlists(self):
        self.go_to_playlist_page()
        time.sleep(3)
        playlists_to_create = self.playlist_df.playlist.unique()
        for playlist_name in playlists_to_create:
            self.create_new_playlist(playlist_name)
    
    def search_song(self, title, artist, album, duration):
        search_string = ' '.join([title, artist.replace('&', ''), album])
        search_button = self.browser.find_element_by_xpath('//*[@id="main"]/div/div[4]/div[1]/nav/div[1]/ul/li[1]/div/a')
        search_button.click()
        search_box = self.browser.find_element_by_class_name('inputBox-input')
        search_box.send_keys(search_string)
        time.sleep(2)
        try:
            tracks_button = self.browser.find_element_by_xpath('//*[@id="main"]/div/div[4]/div[2]/div[1]/section/nav/ul/li[2]/div/a')
            tracks_button.click()
            time.sleep(2)
            results = self.browser.find_elements_by_class_name('tracklist-row')
            for track in results:
                evidence = 0
                track_string = track.text.split('\n')
                result_title, result_duration = track_string[0].strip(), track_string[2].strip()
                result_artist, result_album = [text.strip() for text in track_string[1].split('•')]
                if title.lower() == result_title.lower():
                    evidence += 1
                if duration == result_duration:
                    evidence += 1
                if album.lower() == result_album.lower():
                    evidence += 1
                if evidence > 1:
                    return track
        except:
            print('Could not find "%s"' % title)
            return None

    def add_song(self, track, playlist_name):
        track_html = track.find_element_by_class_name('track-name-wrapper')
        while True:
            try:
                mouse_hover = ActionChains(self.browser).move_to_element(track_html)
                mouse_hover.perform()
                time.sleep(2)
                track_options = track.find_element_by_class_name('spoticon-ellipsis-16')
                track_options.click()
                time.sleep(1)
                add_to_playlist_button = self.browser.find_element_by_xpath('//*[@id="main"]/div/nav[1]/div[4]')
                add_to_playlist_button.click()
                break
            except:
                pass
        playlist_box = self.browser.find_element_by_xpath('//*[@id="main"]/div/div[6]/div/div/div/div[2]/div')
        playlists = playlist_box.find_elements_by_class_name('GlueDropTarget')
        for spotify_playlist in playlists:
            spotify_playlist_name = spotify_playlist.text.split('\n')[1]
            if spotify_playlist_name == playlist_name:
                spotify_playlist.click()
                break
        time.sleep(1)
        self.go_to_playlist_page()
        time.sleep(1)
    
    def populate_playlist(self, single_playlist_df, playlist_name):
        for title, artist, album, duration in single_playlist_df[['title', 'artist', 'album', 'duration']].values:
            correct_track = self.search_song(title, artist, album, duration)
            if not correct_track:
                print(title, 'didnt work')
                continue
            self.add_song(correct_track, playlist_name)
            print('---> Successfully added: %s' % title)
    def populate_all_playlists(self):
        self.go_to_playlist_page()
        grouped_playlists = self.playlist_df.groupby('playlist')
        for playlist_name, playlist_df in grouped_playlists:
            self.populate_playlist(playlist_df, playlist_name)
            print('Successfully created %s' % playlist_name)

## 1. Run below cell

In [259]:
pms = PlayMusicScraper()

## 2. Log in to Google Play Music
## 3. Run cell below

In [261]:
pms.scrape_all_playlists()

Found 24 playlists to scrape...
---> Scraping (#1) "*whimpers softly into pillow*"
---> Scraping (#2) "B"
---> Scraping (#3) "Blz"
---> Scraping (#4) "Buyk"
---> Scraping (#5) "chel illectronic"
---> Scraping (#6) "CPT"
---> Scraping (#7) "darty"
---> Scraping (#8) "downbeet"
---> Scraping (#9) "EarE"
---> Scraping (#10) "flI"
---> Scraping (#11) "haters gon hate"
---> Scraping (#12) "haus step"
---> Scraping (#13) "heaux'd up"
---> Scraping (#14) "I'm an 11 year old girl"
---> Scraping (#15) "lectro hop"
---> Scraping (#16) "metal jåms"
---> Scraping (#17) "parT"
---> Scraping (#18) "partayyyyy"
---> Scraping (#19) "rap hop and b"
---> Scraping (#20) "rdo"
---> Scraping (#21) "sk7"
---> Scraping (#22) "TRAPT"
---> Scraping (#23) "upbeet"
---> Scraping (#24) "ye olde jáms"


## 4. Run cell below

In [263]:
sc = SpotifyController()

## 5. Log in to Spotify

In [117]:
sc.create_all_new_playlists()

In [None]:
sc.populate_all_playlists()

---> Successfully added: Open Arms
---> Successfully added: Do You Go Up (Original Mix)
---> Successfully added: Gita
---> Successfully added: Surrender
---> Successfully added: Watch My Back
---> Successfully added: Take (Original Mix)
Successfully created *whimpers softly into pillow*
---> Successfully added: Lorde 2Pac Beck Mashup
---> Successfully added: ICY GRL
---> Successfully added: Never Gonna Give You Up
---> Successfully added: OKRA
---> Successfully added: Angggry Byrdz
---> Successfully added: Werkin' Girls
---> Successfully added: Come On! Come On!
---> Successfully added: Heregoesnothin
