In [79]:
import pandas as pd
import numpy as np
import wikipedia as wiki
from bs4 import BeautifulSoup
from optional import Optional
from geopy.geocoders import Nominatim

In [68]:
def getArtistOrigin(name: str):
    result = getOriginFromWikipedia(name)
    
    if result.is_empty():
        print("Still not found!")
        
    if result.is_present():
        toReturn = result.get()
    else:
        toReturn = "Not Found"
    return toReturn

def getOriginFromWikipedia(name: str):
    toReturn = Optional.empty()
    artistPage = wiki.page(wiki.search(name)[0], auto_suggest=False).html()
    
    bs = BeautifulSoup(artistPage)
    
    tables = bs.find_all("table")
    
    tab = None
    for tab in tables:
        if "infobox" in tab["class"]:
            break
            
    if tab != None:
        titles = [heading.text.strip() for heading in tab.find_all("th")]
        titles.pop(0)
        data = [data.text.strip() for data in tab.find_all("td")]

        for key, value in zip(titles, data):
            if key =="Origin":
                toReturn = Optional.of(value)
    
    return toReturn

In [69]:
# simple test
assert getArtistOrigin("DaBaby") == 'Charlotte, North Carolina')

In [85]:
def createDataframe(artist_list):
    location_name = [getArtistOrigin(artist) for artist in artist_list]
    location_coord = [getLocationByGeo(loc) for loc in location_name]
    df = pd.DataFrame({"artist_name" : artist_list, "location_name" : location_name, "location_coord": location_coord })
    return df
                     
## https://stackoverflow.com/questions/56873950/convert-place-name-to-coordinates-by-geopy
def getLocationByGeo(name):
    geolocator = Nominatim(user_agent='my-application')
    location = geolocator.geocode(name)
    lat = location.latitude
    lng = location.longitude
    return (lat,lng)

In [88]:
sample_artists = ["vampire weekend", "ra ra riot", "saint motel", "daft punk", "jukebox the ghost", "smallpools"]
createDataframe(sample_artists)

Unnamed: 0,artist_name,location_name,location_coord
0,vampire weekend,"New York City, U.S.","(40.6504178, -73.7971341)"
1,ra ra riot,"Syracuse, New York, U.S.","(43.0481221, -76.1474244)"
2,saint motel,"Los Angeles, California, US","(34.0536909, -118.2427666)"
3,daft punk,"Paris, France","(48.8566969, 2.3514616)"
4,jukebox the ghost,DC metro-area,"(14.63487765, 121.03419475000001)"
5,smallpools,"Los Angeles, California, United States","(34.0536909, -118.2427666)"
