In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime, timedelta
import pandas as pd
import time
import requests
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from base64 import b64encode
import discogs_client
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import csv

In [2]:
class Scraper:
    def __init__(self):
        options = Options()
        options.add_argument("--headless")
        service = Service(executable_path=r"C:\webdrivers\chromedriver.exe")
        self.driver = webdriver.Chrome(service=service, options=options)
        self.driver.set_page_load_timeout(120)

    def tearDown(self):
        self.driver.quit()

    def get_coordinates(self, city_name):
        base_url = 'https://nominatim.openstreetmap.org/search'
        params = {
            'q': city_name,
            'format': 'json'
        }
        headers = {
            'User-Agent': 'MyApp/1.0'
        }
        response = requests.get(base_url, params=params, headers=headers)

        if response.status_code != 200:
            return None, None

        data = response.json()
        if data:
            latitude = data[0]['lat']
            longitude = data[0]['lon']
            return latitude, longitude
        else:
            return None, None
        time.sleep(1)

    def scrape_website(self, start_date, end_date):
        latitude, longitude = self.get_coordinates('the city you want')
        df = pd.DataFrame(columns=["Band Name", "Date"])

        while start_date <= end_date:
            formatted_start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S')
            formatted_end_date = (start_date + timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%S')

            url = f"https://www.bandsintown.com/choose-dates/genre/all-genres?latitude={latitude}&longitude={longitude}&calendarTrigger=false&date={formatted_start_date}%2C{formatted_end_date}"
            self.driver.get(url)

            wait = WebDriverWait(self.driver, 10)
            wait.until(EC.visibility_of_any_elements_located((By.CLASS_NAME, '_5CQoAbgUFZI3p33kRVk')))

            try:
                view_all_button = self.driver.find_element(By.CLASS_NAME, "uM7snV9wLa0DzLoMy9Q1")
                view_all_button.click()
            except Exception as e:
                print(f"Error clicking 'View All' button: {e}")

            for _ in range(10):
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(5)

                bands = self.driver.find_elements(By.CLASS_NAME, "_5CQoAbgUFZI3p33kRVk")
                dates = self.driver.find_elements(By.CLASS_NAME, "r593Wuo4miYix9siDdTP")

                for band, date in zip(bands, dates):
                    band_name = band.text.encode('raw_unicode_escape').decode('utf-8', 'ignore')
                    concert_date = date.text

                    if '-' in band_name:
                        band_name = band_name.split('-')[0].strip()

                    if not df[df['Band Name'] == band_name].empty:
                        df.loc[df['Band Name'] == band_name, 'Date'].apply(lambda x: x.append(concert_date) if concert_date not in x else x)
                    else:
                        df.loc[len(df.index)] = {"Band Name": band_name, "Date": [concert_date]}

            start_date += timedelta(days=3)

        df.to_csv('bands_and_dates.csv', index=False)

scraper = Scraper()
try:
    scraper.scrape_website(datetime(2024, 5, 1), datetime(2024, 5, 2))
finally:
    scraper.tearDown


In [3]:
client_id = ''
client_secret = ''

credentials = b64encode(f'{client_id}:{client_secret}'.encode('utf-8')).decode('utf-8')

headers = {
    'Authorization': f'Basic {credentials}',
    'Content-Type': 'application/x-www-form-urlencoded'
}

data = {
    'grant_type': 'client_credentials'
}

response = requests.post('https://accounts.spotify.com/api/token', headers=headers, data=data)

access_token = response.json()['access_token']


In [6]:

def get_spotify_id(artist_name, access_token):
    base_url = 'https://api.spotify.com/v1/search'
    headers = {'Authorization': f'Bearer {access_token}'}
    query_params = {
        'q': artist_name,
        'type': 'artist',
        'limit': 50
    }

    response = requests.get(base_url, headers=headers, params=query_params)
    data = response.json()

    if 'artists' in data and 'items' in data['artists'] and data['artists']['items']:
        returned_artist_name = data['artists']['items'][0]['name']
        if returned_artist_name.lower() == artist_name.lower():
            spotify_id = data['artists']['items'][0]['id']
            return spotify_id

    return None


csv_file_path = 'bands_and_dates.csv'

new_csv_file_path = 'band_names_with_spotify_ids.csv'
fieldnames = ['Band Name','Date', 'Spotify ID']

with open(csv_file_path, 'r', newline='') as file, open(new_csv_file_path, 'w', newline='') as new_file:
    reader = csv.DictReader(file)
    writer = csv.DictWriter(new_file, fieldnames=fieldnames)

    writer.writeheader()

    for row in reader:
        band_name = row['Band Name']
        spotify_id = get_spotify_id(band_name, access_token)
        if spotify_id:
            row['Spotify ID'] = spotify_id


        writer.writerow(row)

print(f"Spotify IDs appended to {new_csv_file_path}.")


Spotify IDs appended to band_names_with_spotify_ids.csv.


In [7]:
d = discogs_client.Client('ExampleApplication/0.1', user_token='')

df_bands = pd.read_csv('band_names_with_spotify_ids.csv')

bands = df_bands["Band Name"].tolist()
dates = df_bands["Date"].tolist()
spotifyid = df_bands["Spotify ID"]

data = []

for band, date in zip(bands, dates):
    try:
        print(f"Searching for {band}")
        results = d.search(band, type='artist')

        if not results:
            print(f"No results found for {band}")
            continue

        artist = results[0]

        if not artist.releases:
            print(f"No releases found for {band}")
            continue

        main_release = artist.releases[0]

        genres = ', '.join(main_release.genres) if isinstance(main_release.genres, list) and main_release.genres else 'N/A'
        styles = ', '.join(main_release.styles) if isinstance(main_release.styles, list) and main_release.styles else 'N/A'

        data.append({'Band': band, 'Date': date, 'Spotify ID':spotifyid, 'Main Release': main_release.title, 'Genres': genres, 'Styles': styles})

        time.sleep(6)

    except Exception as e:
        print(f"Error occurred for {band}: {e}")



df = pd.DataFrame(data)

df.to_csv('bands_with_genres_and_styles.csv', index=False)

filtered_df = df[df['Styles'].str.contains('Indie') & df['Styles'].str.contains('Pop')]

filtered_df.to_csv('indie_and_pop_bands.csv', index=False)

print(filtered_df)


Searching for Odinani
No results found for Odinani
Searching for Bill Hearne Music
Searching for Immortal Guardian
Searching for Bridging The Music Productions
Searching for Edge of Paradise
Searching for Caroline Rose
Searching for Flamy Grant
Searching for Ian Sweet


In [9]:

client_id = ''
client_secret = ''
redirect_uri = 'http://localhost:8888/callback'
scope = 'playlist-modify-public'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri, scope=scope))

current_user = sp.current_user()
user_id = current_user['id']

excel_file = 'band_names_with_spotify_ids.csv'
df = pd.read_csv(excel_file)
band_names = df.iloc[:, 0].tolist()
band_ids = df.iloc[:, 2].tolist()

track_uris = []
for band_id in band_ids:
    if pd.isnull(band_id) or band_id == "":
        print("Skipping band_id as it's NaN or empty")
        continue

    try:
        artist_tracks = sp.artist_top_tracks(band_id)
        if artist_tracks['tracks']:
            top_track = artist_tracks['tracks'][0]
            track_uris.append(top_track['uri'])
    except Exception as e:
        print(f"Error processing band_id {band_id}: {e}")

playlist_name = 'your playlist name'
new_playlist = sp.user_playlist_create(user=user_id, name=playlist_name, public=True)
playlist_id = new_playlist['id']

failed_tracks = []
for uri in track_uris:
    if uri is not None:
        try:
            sp.playlist_add_items(playlist_id, [uri])
        except (TypeError, ValueError) as e:
            print("Skipping track:", e)
            print("Track URI:", uri)
            failed_tracks.append(uri)
        except spotipy.SpotifyException as e:
            if e.http_status == 400 and 'Reason: Playlist contains more than the maximum number of tracks' in e.msg:
                print("Skipping track: Maximum number of tracks reached in the playlist.")
            else:
                print("Error adding track to playlist:", e)
                print("Track URI:", uri)
                failed_tracks.append(uri)
        except Exception as e:
            print("Error adding track to playlist:", e)
            print("Track URI:", uri)
            failed_tracks.append(uri)

if failed_tracks:
    print("Failed to add the following tracks to the playlist:")
    for uri in failed_tracks:
        print("Track URI:", uri)


Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping band_id as it's NaN or empty
Skipping ban