In [2]:
# The %... is an iPython thing, and is not part of the Python language.
# In this case we're just telling the plotting library to draw things on
# the notebook, instead of on a separate window.
%matplotlib inline
# See all the "as ..." contructs? They're just aliasing the package names.
# That way we can call methods like plt.plot() instead of matplotlib.pyplot.plot().
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
import time
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

In [3]:
from pyquery import PyQuery as pq
from bs4 import BeautifulSoup
# The "requests" library makes working with HTTP requests easier
# than the built-in urllib libraries.
import requests

In [4]:
end_year=requests.get("http://www.billboard.com/charts/year-end/2014/hot-100-songs").text

In [7]:
from datetime import date, timedelta

def allsats(year, fs):
    days=[]
    d = date(year, 1, fs)       
    while d.year == year:
        days.append(d.strftime("%Y-%m-%d"))
        d += timedelta(days = 7)
    return days

In [8]:
def allsats(syear, years, fs):
    days=[]
    d = date(syear, 1, fs)
    while d.year in years:
        days.append(d.strftime("%Y-%m-%d"))
        d += timedelta(days = 7)
    return days

In [9]:
years = [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014]
sats = allsats(2000, years, 1)

In [11]:
year2015 = [2015]
sats2015 = allsats(2015, year2015, 3)
satdict2015={}
for sat in sats2015:
    satdict2015[sat]=requests.get("http://www.billboard.com/charts/hot-100/"+sat).text
    time.sleep(1)

In [60]:
satdict={}
satdict['end_year']=end_year
for sat in sats:
    satdict[sat]=requests.get("http://www.billboard.com/charts/hot-100/"+sat).text
    time.sleep(1)

In [13]:
import json

In [62]:
with open("tempdata/satdict.json","w") as fd:
    json.dump(satdict, fd)
del satdict
with open("tempdata/satdict.json") as fd:
    satdict=json.load(fd)

In [14]:
def rank_scrape(articlerows):
    songdicts = []
    songdict = {}
    ranking = 1
    for article in articlerows:
        songdict["ranking"] = ranking
        title = article.find("div", attrs={"class": "row-title"}).find("h2").get_text()
        artist = article.find("div", attrs={"class": "row-title"}).find("h3").get_text()
        title = title.strip()
        sep1 = 'Featuring'
        sep2 = ','
        sep3 = '&'
        artist = artist.split(sep1, 1)[0]
        artist = artist.split(sep2, 2)[0]
        artist = artist.split(sep3, 3)[0]
        artist = artist.strip()
        songdict["title"] = title
        songdict["artist"] = artist
        songdicts.append(dict(songdict))
        ranking += 1
    return songdicts

def get_weekly(weekstring, indict):
    raw_chart = indict[weekstring]
    text = BeautifulSoup(raw_chart, "html.parser")
    rows = text.find_all("article", attrs={"class": "chart-row"})
    
    testdicts = rank_scrape(rows)
    for test in testdicts:
        test['date'] = weekstring
    return testdicts

In [None]:
weekinfo=[]
for k in sats:
    weekinfo.append(get_weekly(k, satdict))

In [None]:
datum = []
for week in weekinfo:
    datum.append(pd.DataFrame(week))
data2014 = pd.concat(datum, ignore_index=True)

In [15]:
weekinfo=[]
for k in sats2015:
    weekinfo.append(get_weekly(k, satdict2015))

In [16]:
datum = []
for week in weekinfo:
    datum.append(pd.DataFrame(week))
data2015 = pd.concat(datum, ignore_index=True)

In [95]:
newdata2014 = data2014.drop_duplicates(subset=['artist', 'title'])

In [18]:
newdata2015 = data2015.drop_duplicates(subset=['artist', 'title'])

In [96]:
newdata2014.to_csv('tempdata/rawdata.csv', encoding='utf-8')

In [44]:
newdata2014 = pd.read_csv('tempdata/rawdata.csv')

In [19]:
newdata2015.to_csv('tempdata/raw2015data.csv', encoding='utf-8')

In [20]:
newdata2015 = pd.read_csv('tempdata/raw2015data.csv')

In [103]:
lisa2014 = newdata2014[0:2888]
steven2014 = newdata2014[2888:]
lisa2014.to_csv('tempdata/lisa2014.csv')
steven2014.to_csv('tempdata/steven2014.csv')

Unnamed: 0,artist,date,ranking,title
0,Santana,2000-01-01,1,Smooth
1,Brian McKnight,2000-01-01,2,Back At One
2,Jessica Simpson,2000-01-01,3,I Wanna Love You Forever
3,Whitney Houston,2000-01-01,4,My Love Is Your Love
4,Savage Garden,2000-01-01,5,I Knew I Loved You


In [153]:
import urllib2
#all_songs = []
#featurings = []
for index, row in steven2014[780:].iterrows():
    song = urllib2.quote(row["title"].encode("utf-8"))
    artist = urllib2.quote(row["artist"].encode("utf-8"))
    link = requests.get('http://developer.echonest.com/api/v4/song/search?api_key=GT3HIFAMRWWCOWIAZ' + 
                        '&bucket=artist_discovery&bucket=artist_discovery_rank&bucket=artist_familiarity' +
                        '&bucket=artist_familiarity_rank&bucket=artist_hotttnesss&bucket=artist_hotttnesss_rank' +
                        '&bucket=artist_location&bucket=song_currency&bucket=song_currency_rank' +
                        '&bucket=song_discovery&bucket=song_discovery_rank&bucket=song_hotttnesss'+
                        '&bucket=song_hotttnesss_rank&bucket=song_type&bucket=audio_summary' +
                        '&results=1&title=%(song)s&artist=%(artist)s' % \
                        {"song": song, "artist": artist})
    result_json = json.loads(link.text)
    if not result_json["response"]["songs"]:
        featurings.append(index)
    all_songs.append(result_json)
    time.sleep(4)

In [24]:
import urllib2
all_songs2015 = []
featurings2015 = []
for index, row in newdata2015.iterrows():
    song = urllib2.quote(row["title"].encode("utf-8"))
    artist = urllib2.quote(row["artist"].encode("utf-8"))
    link = requests.get('http://developer.echonest.com/api/v4/song/search?api_key=GT3HIFAMRWWCOWIAZ' + 
                        '&bucket=artist_discovery&bucket=artist_discovery_rank&bucket=artist_familiarity' +
                        '&bucket=artist_familiarity_rank&bucket=artist_hotttnesss&bucket=artist_hotttnesss_rank' +
                        '&bucket=artist_location&bucket=song_currency&bucket=song_currency_rank' +
                        '&bucket=song_discovery&bucket=song_discovery_rank&bucket=song_hotttnesss'+
                        '&bucket=song_hotttnesss_rank&bucket=song_type&bucket=audio_summary' +
                        '&results=1&title=%(song)s&artist=%(artist)s' % \
                        {"song": song, "artist": artist})
    result_json = json.loads(link.text)
    if not result_json["response"]["songs"]:
        featurings2015.append(index)
    all_songs2015.append(result_json)
    time.sleep(4)

In [25]:
print len(all_songs2015)

498


In [27]:
print len(featurings2015)

31


In [31]:
fd=open("tempdata/all_songs2015.json","w")
json.dump(all_songs2015, fd)
fd.close()

In [155]:
newdata2014.ix[featurings2015]

176

In [156]:
fd=open("tempdata/steven_songs.json","w")
json.dump(all_songs, fd)
fd.close()

In [22]:
with open("tempdata/steven_songs.json") as json_file:
    test_songs = json.load(json_file)

In [354]:
link = requests.get('http://developer.echonest.com/api/v4/song/search?api_key=GT3HIFAMRWWCOWIAZ' + 
                        '&bucket=artist_discovery&bucket=artist_discovery_rank&bucket=artist_familiarity' +
                        '&bucket=artist_familiarity_rank&bucket=artist_hotttnesss&bucket=artist_hotttnesss_rank' +
                        '&bucket=artist_location&bucket=song_currency&bucket=song_currency_rank' +
                        '&bucket=song_discovery&bucket=song_discovery_rank&bucket=song_hotttnesss'+
                        '&bucket=song_hotttnesss_rank&bucket=song_type&bucket=audio_summary' +
                        '&results=1&title=%(song)s&artist=%(artist)s' % \
                        {"song": "come join", "artist": 'white buffalo'})
blah = json.loads(link.text)
blah

{u'response': {u'songs': [{u'artist_discovery': 0.4672030233883395,
    u'artist_discovery_rank': 17629,
    u'artist_familiarity': 0.523893,
    u'artist_familiarity_rank': 8074,
    u'artist_hotttnesss': 0.581075,
    u'artist_hotttnesss_rank': 3538,
    u'artist_id': u'ARRU4V71187B9993CA',
    u'artist_location': {u'latitude': 33.973951,
     u'location': u'Los Angeles, CA',
     u'longitude': -118.248405},
    u'artist_name': u'White Buffalo',
    u'audio_summary': {u'acousticness': 0.669595,
     u'analysis_url': u'http://echonest-analysis.s3.amazonaws.com/TR/vKxQVgUajJWafjbOT9XUzyZ1gBzCbDY7IZWR9vjgYOQRCdl1BPzie7qmZ_esE9ckf7RWt-I5PvTj-w4mA%3D/3/full.json?AWSAccessKeyId=AKIAJRDFEY23UEVW42BQ&Expires=1449727069&Signature=8UKGRRvgv2CjOYricHitnJnSeRM%3D',
     u'audio_md5': u'',
     u'danceability': 0.529344,
     u'duration': 448.09333,
     u'energy': 0.445162,
     u'instrumentalness': 0.031966,
     u'key': 5,
     u'liveness': 0.105663,
     u'loudness': -7.156,
     u'mode': 1,


In [358]:
newdata2014['artist'][2931] = "Usher"
newdata2014['title'][3020] = "Bartender Song"
newdata2014['artist'][3067] = "Kid Rock"
newdata2014['artist'][3105] = "Kid Rock"
newdata2014['title'][3249] = "Move"
newdata2014['artist'][3209] = "Kenny Chesney"
newdata2014['artist'][3131] = "Kenny Chesney"
newdata2014['artist'][3142] = "Brad Paisley"
newdata2014['title'][3028] = "Out Here Grindin'"
newdata2014['title'][3072] = "Somethin' Special"
newdata2014['artist'][3075] = "Estelle"
newdata2014['artist'][3273] = "Soulja Boy"
newdata2014['artist'][3229] = "Soulja Boy"
newdata2014['artist'][3302] = "Soulja Boy"
newdata2014['artist'][3895] = "Soulja Boy"
newdata2014['artist'][3278] = "A.R. Rahman"
newdata2014['title'][3278] = "Jai Ho"
newdata2014['title'][3357] = "3 a.m."
newdata2014['artist'][3265] = "Coldplay"
newdata2014['title'][3230] = "Jizz in My Pants"
newdata2014['title'][3268] = "Fuck You"
newdata2014['artist'][3373] = "Fast Life Yungstaz"
newdata2014['title'][3411] = "Wetter"
newdata2014['artist'][3436] = "Keyshia Cole"
newdata2014['title'][3439] = "Fallin' For You"
newdata2014['artist'][3443] = "Demi Lovato"
newdata2014['artist'][3454] = "OneRepublic"
newdata2014['title'][3491] = "Gettin' You Home"
newdata2014['artist'][3492] = "Kenny Chesney"
newdata2014['artist'][3517] = "Jay-Z"
newdata2014['title'][3524] = "I'm Goin In"
newdata2014['artist'][3539] = "P!nk"
newdata2014['artist'][3569] = "Thirty Seconds to Mars"
newdata2014['artist'][3687] = "Jay-Z"
newdata2014['title'][3695] = "Stranded"
newdata2014['title'][3702] = "My City of Ruins"
newdata2014['artist'][3707] = "P!nk"
newdata2014['title'][3712] = "Fuck Today"
newdata2014['artist'][3746] = "Three 6 Mafia"
newdata2014['artist'][3875] = "V. V. Brown"
newdata2014['title'][3891] = "9AM in Dallas"
newdata2014['artist'][3897] = "Dirty Heads"
newdata2014['title'][3949] = "2012"
newdata2014['title'][3977] = "Forget You"
newdata2014['title'][4084] = "The Best Thing About Me is You"
newdata2014['artist'][4087] = "Jason Aldean"
newdata2014['artist'][4109] = "Michael Jackson"
newdata2014['title'][4234] = "Did It On Em"
newdata2014['artist'][4327] = "Enrique Iglesias"
newdata2014['artist'][4377] = "Brad Paisley"
newdata2014['title'][4403] = "The Man Who"
newdata2014['artist'][4449] = "Jay-Z"
newdata2014['artist'][4458] = "Jay-Z"
newdata2014['artist'][4462] = "Jay-Z"
newdata2014['artist'][4475] = "New Boyz"
newdata2014['artist'][4505] = "Chris Brown"
newdata2014['title'][4562] = "Stronger"
newdata2014['title'][4597] = "HYFR"
newdata2014['artist'][4652] = "Jay-Z"
newdata2014['title'][4679] = "4AM"
newdata2014['title'][4744] = "I Don't Really Care"
newdata2014['artist'][4845] = "Big Time Rush"
newdata2014['artist'][4863] = "Pusha T"
newdata2014['artist'][4928] = "Lovers' Eyes"
newdata2014['artist'][4901] = "Kanye West"
newdata2014['artist'][4970] = "Swedish House Mafia"
newdata2014['artist'][4972] = "Jason Aldean"
newdata2014['title'][4976] = "A Thousand Years"
newdata2014['artist'][4977] = "Christina Aguilera"
newdata2014['title'][5029] = "We Still In This Bitch"
newdata2014['artist'][5080] = "Tim McGraw"
newdata2014['title'][5092] = "Rich as Fuck"
newdata2014['artist'][5129] = "T.I./B.O.B./Kendrick Lamar"
newdata2014['title'][5139] = "Getting Over You"
newdata2014['title'][5161] = "Grandpa"
newdata2014['artist'][5228] = "Rich Gang"
newdata2014['artist'][5268] = "August Alsina"
newdata2014['title'][5268] = "I Luv this Shit"
newdata2014['artist'][5471] = "Fitz & The Tantrums"
newdata2014['artist'][5273] = "Zedd"
newdata2014['artist'][5277] = "Keith Urban"
newdata2014['title'][5339] = "Let Me Be Lonely"
newdata2014['artist'][5368] = "August Alsina"
newdata2014['title'][5368] = "I Luv this Shit"
newdata2014['title'][5506] = "We are One"
newdata2014['artist'][5507] = "K Camp"
newdata2014['title'][5532] = "Hold on"
newdata2014['artist'][5556] = "Miranda Lambert"
newdata2014['title'][5560] = "Help Falling In Love"
newdata2014['artist'][5588] = "X Ambassadors"
newdata2014['artist'][5573] = "Florida Georgia Line"
newdata2014['title'][5679] = "I Don't Fuck with You"
newdata2014['title'][5715] = "GDFR"
newdata2014['title'][5736] = "Shut Up and Dance"
newdata2014['artist'][5746] = "Nicki Minaj"
newdata2014['artist'][5762]= "Lilly Wood"
newdata2014['artist'][5771] = "White Buffalo"
newdata2014['title'][3695] = "Stranded"

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.

In [361]:
newdata2014['artist'][5771]

'White Buffalo'

In [364]:
featurings = []
for song in range(len(test_songs)):
    if not test_songs[song]["response"]["songs"]:
        featurings.append(song)
new_featurings = [x+2888 for x in featurings]
print test_songs[featurings[0]]
juliefixessteven2014 = newdata2014.ix[new_featurings]
juliefixessteven2014.head()

{u'response': {u'status': {u'message': u'Success', u'code': 0, u'version': u'4.2'}, u'songs': []}}


Unnamed: 0.1,Unnamed: 0,artist,date,ranking,title
2913,43426,Carrie Underwood,2008-04-26,27,Praying For Time
2914,43442,American Idol Top 8,2008-04-26,43,Shout To The Lord
2916,43479,Annie Lennox,2008-04-26,80,Many Rivers To Cross
2931,43678,Usher,2008-05-10,79,"Love In This Club, Part II"
2936,43695,Ashlee Simpson With Tom Higgenson,2008-05-10,96,Little Miss Obsessive


In [379]:
import urllib2
#juliefixes = []
#julie_leftovers = []
for index, row in juliefixessteven2014[164:].iterrows():
    song = urllib2.quote(row["title"].encode("utf-8"))
    artist = urllib2.quote(row["artist"].encode("utf-8"))
    link = requests.get('http://developer.echonest.com/api/v4/song/search?api_key=GT3HIFAMRWWCOWIAZ' + 
                        '&bucket=artist_discovery&bucket=artist_discovery_rank&bucket=artist_familiarity' +
                        '&bucket=artist_familiarity_rank&bucket=artist_hotttnesss&bucket=artist_hotttnesss_rank' +
                        '&bucket=artist_location&bucket=song_currency&bucket=song_currency_rank' +
                        '&bucket=song_discovery&bucket=song_discovery_rank&bucket=song_hotttnesss'+
                        '&bucket=song_hotttnesss_rank&bucket=song_type&bucket=audio_summary' +
                        '&results=1&title=%(song)s&artist=%(artist)s' % \
                        {"song": song, "artist": artist})
    result_json = json.loads(link.text)
    print result_json
    if not result_json["response"]["songs"]:
        julie_leftovers.append(index)
    juliefixes.append(result_json)
    time.sleep(4.5)

{u'response': {u'status': {u'code': 0, u'message': u'Success', u'version': u'4.2'}, u'songs': []}}
{u'response': {u'status': {u'code': 0, u'message': u'Success', u'version': u'4.2'}, u'songs': []}}
{u'response': {u'status': {u'code': 0, u'message': u'Success', u'version': u'4.2'}, u'songs': []}}
{u'response': {u'status': {u'code': 0, u'message': u'Success', u'version': u'4.2'}, u'songs': []}}
{u'response': {u'status': {u'code': 0, u'message': u'Success', u'version': u'4.2'}, u'songs': [{u'song_hotttnesss': 0.741112, u'song_discovery': 0.0, u'song_currency_rank': 4737, u'artist_discovery_rank': 8550, u'title': u'GDFR', u'artist_discovery': 0.4809879873557384, u'song_hotttnesss_rank': 41, u'artist_name': u'Flo Rida', u'song_type': [u'studio', u'electric', u'vocal'], u'artist_hotttnesss': 0.831246, u'artist_familiarity_rank': 578, u'audio_summary': {u'time_signature': 4, u'analysis_url': u'http://echonest-analysis.s3.amazonaws.com/TR/HLIM0g82Qf182SojQi-FxDEcomqoD3tskahG-MiERWxzL3IMJPqfYXX

In [384]:
juliefixessteven2014.to_csv('tempdata/juliefixessteven2014.csv', encoding='utf-8')
fd=open("tempdata/julie_fixes.json","w")
json.dump(juliefixes, fd)
fd.close()

In [520]:
lisa_featurings = pd.read_csv('tempdata/lisa_featurings.csv')
lisa_featurings.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,artist,date,ranking,title
0,6,6,"Missy ""Misdemeanor"" Elliott",2000-01-01,7,Hot Boyz
1,9,9,LFO,2000-01-01,10,Girl On TV
2,17,17,Christina Aguilera,2000-01-01,18,The Christmas Song (Chestnuts Roasting On An O...
3,43,43,Puff Daddy,2000-01-01,44,Satisfy You
4,52,52,98 Degrees,2000-01-01,53,This Gift


In [519]:
link = requests.get('http://developer.echonest.com/api/v4/song/search?api_key=GT3HIFAMRWWCOWIAZ' + 
                        '&bucket=artist_discovery&bucket=artist_discovery_rank&bucket=artist_familiarity' +
                        '&bucket=artist_familiarity_rank&bucket=artist_hotttnesss&bucket=artist_hotttnesss_rank' +
                        '&bucket=artist_location&bucket=song_currency&bucket=song_currency_rank' +
                        '&bucket=song_discovery&bucket=song_discovery_rank&bucket=song_hotttnesss'+
                        '&bucket=song_hotttnesss_rank&bucket=song_type&bucket=audio_summary' +
                        '&results=1&title=%(song)s&artist=%(artist)s' % \
                        {"song": "Wanna Be Startin' Somethin' 2008", "artist": "Michael Jackson"})
blah = json.loads(link.text)
blah

{u'response': {u'songs': [{u'artist_discovery': 0.28475279963945227,
    u'artist_discovery_rank': 520105,
    u'artist_familiarity': 0.86737,
    u'artist_familiarity_rank': 16,
    u'artist_hotttnesss': 0.754886,
    u'artist_hotttnesss_rank': 258,
    u'artist_id': u'ARXPPEY1187FB51DF4',
    u'artist_location': {u'latitude': 41.590686,
     u'location': u'Gary, IN',
     u'longitude': -87.319937},
    u'artist_name': u'Michael Jackson',
    u'audio_summary': {u'acousticness': 0.003895,
     u'analysis_url': u'http://echonest-analysis.s3.amazonaws.com/TR/0dl4jAuWxUzen_FUl2B0-NF3Yl8JBZoaSyh0aJtGCiunzq6Z9oAEAvM4Fs2rzUaf9AXA0omPCqp8OWDXw%3D/3/full.json?AWSAccessKeyId=AKIAJRDFEY23UEVW42BQ&Expires=1449742861&Signature=Q5hgjHMDS3YVL2ZeS4JBMOVxHkY%3D',
     u'audio_md5': u'8c2865b447cc9a19744edc24f8331051',
     u'danceability': 0.680087,
     u'duration': 216.01288,
     u'energy': 0.919597,
     u'instrumentalness': 0.005823,
     u'key': 0,
     u'liveness': 0.100488,
     u'loudness': -

In [522]:
lisa_featurings['artist'][0] = "Missy Elliott"
lisa_featurings['title'][2] = "The Christmas Song"
lisa_featurings['artist'][3] = "Diddy"
lisa_featurings['artist'][5] = "Deborah Cox"
lisa_featurings['artist'][6] = "Chad Brock"
lisa_featurings['title'][7] = "U Know What's Up"
lisa_featurings['title'][8] = "He Cant Love U"
lisa_featurings['title'][9] = "That's The Way It Is"
lisa_featurings['title'][10] = "Don't Say You Love Me"
lisa_featurings['artist'][11] = "Destiny's Child"
lisa_featurings['title'][12] = "He Didn't Have To Be"
lisa_featurings['artist'][13] = "Snoop Dogg"
lisa_featurings['title'][13] = "G'd Up"
lisa_featurings['artist'][14] = "Deborah Cox"
lisa_featurings['title'][14] = "We Can't Be Friends"
lisa_featurings['artist'][15] = "Destiny's Child"
lisa_featurings['artist'][16] = "D'Angelo"
lisa_featurings['title'][17] = "She Thinks My Tractor's Sexy"
lisa_featurings['title'][18] = "What's My Name"
lisa_featurings['artist'][19] = "D'Angelo"
lisa_featurings['artist'][20] = "*NSync"
lisa_featurings['title'][21] = "I Don't Wanna"
lisa_featurings['title'][22] = "Love's The Only House"
lisa_featurings['artist'][23] = "Diddy"
lisa_featurings['artist'][24] = "Hot Boy$"
lisa_featurings['artist'][25] = "Clint Black"
lisa_featurings['artist'][26] = "2Pac"
lisa_featurings['title'][26] = "Baby Don't Cry"
lisa_featurings['title'][27] = "That's What I'm Looking For"
lisa_featurings['title'][28] = "U Don't Love Me"
lisa_featurings['title'][29] = "Daddy Won't Sell The Farm"
lisa_featurings['artist'][30] = "Kenny Rogers"
lisa_featurings['title'][31] = "She's More"
lisa_featurings['title'][32] = "He Wasn't Man Enough"
lisa_featurings['title'][33] = "Couldn't Last A Moment"
lisa_featurings['title'][34] = "Can't Stay"
lisa_featurings['title'][35] = "If You Don't Wanna Love Me"
lisa_featurings['title'][36] = "I'm Outta Love"
lisa_featurings['title'][37] = "I Don't Wanna Kiss You Goodnight"
lisa_featurings['title'][38] = "It's So Hard"
lisa_featurings['title'][39] = "Let's Get Married"
lisa_featurings['title'][40] = "It's Gonna Be Me"
lisa_featurings['title'][41] = "Jumpin', Jumpin'"
lisa_featurings['artist'][41] = "Destiny's Child"
lisa_featurings['artist'][42] = "Lee Ann Womack"
lisa_featurings['title'][43] = "I'll Be"
lisa_featurings['title'][44] = "You'll Always Be Loved By Me"
lisa_featurings['title'][45] = "Doesn't Really Matter"
lisa_featurings['title'][46] = "That's The Way"
lisa_featurings['artist'][48] = "*NSync"
lisa_featurings['artist'][49] = "*NSync"
lisa_featurings['artist'][50] = "Snoop Dogg"
lisa_featurings['title'][51] = "I Think I'm In Love With You"
lisa_featurings['title'][52] = "Ruff Ryders"
lisa_featurings['title'][53] = "Don't Think I'm Not"
lisa_featurings['title'][54] = "It's My Life"
lisa_featurings['title'][55] = "It's Always Somethin'"
lisa_featurings['title'][56] = "Let's Make Love"
lisa_featurings['artist'][56] = "Faith Hill"
lisa_featurings['artist'][57] = "P!nk"
lisa_featurings['artist'][58] = "Bow Wow"
lisa_featurings['artist'][59] = "You're A God"
lisa_featurings['artist'][60] = "Aaron's Party (Come Get It)"
lisa_featurings['artist'][61] = "Can't Fight The Moonlight"
lisa_featurings['artist'][62] = "Can't Go For That"
lisa_featurings['artist'][63] = "That's the Kind Of Mood I'm In"
lisa_featurings['title'][64] = "Indepedent Women, Part I"
lisa_featurings['artist'][64] = "Destiny's Child"
lisa_featurings['title'][65] = "You Should've Told Me"
lisa_featurings['title'][66] = "You Won't Be Lonely Now"
lisa_featurings['artist'][67] = "*NSync"
lisa_featurings['title'][68] = "What's Your Fantasy"
lisa_featurings['artist'][69] = "Cam'Ron"
lisa_featurings['title'][70] = "If You're Gone"
lisa_featurings['title'][71] = "Toca's Miracle"
lisa_featurings['title'][72] = "It Wasn't Me"
lisa_featurings['title'][73] = "No More (Baby I'ma Do Right)"
lisa_featurings['artist'][74] = "Damizza"
lisa_featurings['title'][76] = "Love Don't Cost A Thing"
lisa_featurings['title'][78] = "Don't Tell Me"
lisa_featurings['title'][79] = "It's Over Now"
lisa_featurings['artist'][80] = "*NSync"
lisa_featurings['artist'][81] = "Bow Wow"
lisa_featurings['artist'][82] = "P!nk"
lisa_featurings['artist'][83] = "Shaggy"
lisa_featurings['artist'][84] = "Ricky Martin"
lisa_featurings['artist'][85] = "Aaron Lewis"
lisa_featurings['artist'][86] = "Baha Men"
lisa_featurings['title'][87] = "Drops Of Jupiter"
lisa_featurings['artist'][88] = "Missy Elliott"
lisa_featurings['artist'][89] = "Bow Wow"
lisa_featurings['title'][90] = "Baby, Come On Over"
lisa_featurings['artist'][91] = "G. Dep"
lisa_featurings['artist'][92] = "Jagged Edge"
lisa_featurings['artist'][93] = "J. Cozier"
lisa_featurings['artist'][94] = "D12"
lisa_featurings['artist'][95] = "Missy Elliott"
lisa_featurings['artist'][96] = "Bow Wow"
lisa_featurings['title'][97] = "Livin' It Up"
lisa_featurings['artist'][98] = "*NSync"
lisa_featurings['artist'][99] = "Robert L. Huggar"
lisa_featurings['title'][100] = "Rollout (My Business)"
lisa_featurings['title'][101] = "Mamma Got Ass"
lisa_featurings['title'][102] = "Young'n"
lisa_featurings['artist'][103] = "Jo Dee Messina"
lisa_featurings['artist'][104] = "Missy Elliott"
lisa_featurings['title'][105] = "Never Too Far/Hero"
lisa_featurings['artist'][106] = "Bow Wow"
lisa_featurings['artist'][107] = "*NSync"
lisa_featurings['artist'][108] = "P!nk"
lisa_featurings['artist'][110] = "Knoc-Turn'Al"
lisa_featurings['title'][111] = "What's It Gonna Be"
lisa_featurings['artist'][112] = "Goo Goo Dolls"
lisa_featurings['title'][113] = "I'm Gonna Miss Her"
lisa_featurings['title'][114] = "Gimme The Light"
lisa_featurings['title'][115] = "I Need a Girl, Pt. 2"
lisa_featurings['artist'][115] = "Diddy"
lisa_featurings['artist'][116] = "Irv Gotti"
lisa_featurings['artist'][117] = "P!nk"
lisa_featurings['artist'][118] = "Elivs vs JXL"
lisa_featurings['title'][121] = "Out Of My Heart"
lisa_featurings['artist'][122] = "Yasmeen"
lisa_featurings['artist'][123] = "Missy Elliott"
lisa_featurings['artist'][124] = "P!nk"
lisa_featurings['artist'][125] = "Missy Elliott"
lisa_featurings['artist'][126] = "Toby Keith"
lisa_featurings['artist'][127] = "Missy Elliott"
lisa_featurings['title'][127] = "Pussycat"
lisa_featurings['title'][128] = "Never Leave You"
lisa_featurings['artist'][129] = "My a"
lisa_featurings['title'][130] = "Love @ 1st Sight"
lisa_featurings['artist'][131] = "Maroon 5"
lisa_featurings['artist'][132] = "2Pac"
lisa_featurings['artist'][133] = "M.V.P."
lisa_featurings['title'][133] = "Roc Ya Body 'Mic Check 1, 2'"
lisa_featurings['title'][134] = "Another Postcard"
lisa_featurings['artist'][135] = "2Pac"
lisa_featurings['artist'][136] = "Too $hort"
lisa_featurings['artist'][137] = "Scotty Emerick"
lisa_featurings['artist'][138] = "2Pac"
lisa_featurings['artist'][139] = "Maroon 5"
lisa_featurings['title'][140] = "Work It"
lisa_featurings['artist'][141] = "Jimmy Buffett"
lisa_featurings['artist'][143] = "Maroon 5"
lisa_featurings['title'][144] = "Shake Dat Shit"
lisa_featurings['title'][145] = "Let's Go"
lisa_featurings['artist'][146] = "Usher"
lisa_featurings['artist'][147] = "Shania Twain"
lisa_featurings['artist'][148] = "Jay-Z"
lisa_featurings['artist'][149] = "Maroon 5"
lisa_featurings['artist'][150] = "Daryl Hall"
lisa_featurings['title'][151] = "Honkytonk U"
lisa_featurings['title'][153] = "Bithces Ain't Shit"
lisa_featurings['artist'][154] = "Pretty Ricky"
lisa_featurings['artist'][155] = "The Used"
lisa_featurings['title'][156] = "Breathe (2 AM)"
lisa_featurings['title'][157] = "Trapped In The Closet Chapter 1"
lisa_featurings['artist'][158] = "Gorillaz"
lisa_featurings['title'][159] = "Helena"
lisa_featurings['artist'][161] = "Jeezy"
lisa_featurings['artist'][162] = "Damian Marley"
lisa_featurings['artist'][163] = "50 Cent"
lisa_featurings['title'][163] = "Outta Control"
lisa_featurings['title'][164] = "Heard 'Em Say"
lisa_featurings['artist'][165] = "Shakira"
lisa_featurings['title'][166] = "Kryptonite"
lisa_featurings['artist'][166] = "Cristian Castro"
lisa_featurings['artist'][167] = "Jeezy"
lisa_featurings['artist'][180] = "Angels & Airwaves"
lisa_featurings['title'][182] = "I Love My Bitch"
lisa_featurings['artist'][183] = "Mary J. Blige"
lisa_featurings['artist'][184] = "Janet"
lisa_featurings['artist'][185] = "Thirty Seconds To Mars"
lisa_featurings['artist'][186] = "Sean Paul"
lisa_featurings['title'][188] = "Love Me Or Hate Me"
lisa_featurings['artist'][191] = "Thirty Seconds To Mars"
lisa_featurings['title'][192] = "b.u.d.d.y."
lisa_featurings['artist'][193] = "Nake Brothers Band"
lisa_featurings['artist'][194] = "Bow Wow"
lisa_featurings['title'][196] = "Pop Lock & Drop It"
lisa_featurings['artist'][197] = "P!nk"
lisa_featurings['artist'][198] = "Maroon 5"
lisa_featurings['artist'][202] = "Down"
lisa_featurings['artist'][203] = "R. Kelly"
lisa_featurings['title'][204] = "Do You Know? (The Ping Pong Song)"
lisa_featurings['artist'][205] = "Tim McGraw"
lisa_featurings['artist'][228] = "Soulja Boy"
lisa_featurings['artist'][229] = "Billy Ray Cyrus"
lisa_featurings['artist'][236] = "Michael Jackson"
lisa_featurings['artist'][237] = "Soulja Boy"

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.

In [529]:
import urllib2
#stevenfixes = []
#steven_leftovers = []
for index, row in lisa_featurings[237:].iterrows():
    song = urllib2.quote(row["title"].encode("utf-8"))
    artist = urllib2.quote(row["artist"].encode("utf-8"))
    link = requests.get('http://developer.echonest.com/api/v4/song/search?api_key=GT3HIFAMRWWCOWIAZ' + 
                        '&bucket=artist_discovery&bucket=artist_discovery_rank&bucket=artist_familiarity' +
                        '&bucket=artist_familiarity_rank&bucket=artist_hotttnesss&bucket=artist_hotttnesss_rank' +
                        '&bucket=artist_location&bucket=song_currency&bucket=song_currency_rank' +
                        '&bucket=song_discovery&bucket=song_discovery_rank&bucket=song_hotttnesss'+
                        '&bucket=song_hotttnesss_rank&bucket=song_type&bucket=audio_summary' +
                        '&results=1&title=%(song)s&artist=%(artist)s' % \
                        {"song": song, "artist": artist})
    result_json = json.loads(link.text)
    print result_json
    if not result_json["response"]["songs"]:
        steven_leftovers.append(index)
    stevenfixes.append(result_json)
    time.sleep(3.5)

{u'response': {u'status': {u'code': 0, u'message': u'Success', u'version': u'4.2'}, u'songs': [{u'song_discovery': 0.0, u'song_hotttnesss': 0.19146, u'title': u'YAHHH', u'artist_discovery': 0.3428039160603859, u'artist_name': u'Soulja Boy', u'song_type': [u'studio', u'electric', u'vocal'], u'artist_hotttnesss': 0.609224, u'audio_summary': {u'time_signature': 4, u'analysis_url': u'http://echonest-analysis.s3.amazonaws.com/TR/91kwQgo03rj5ypJEL3I6kTR1-D5lAyEhAaR2zj/3/full.json?AWSAccessKeyId=AKIAJRDFEY23UEVW42BQ&Expires=1449744252&Signature=/iBlQ6E7PhLQ7gHdVj6t4UVCpWY%3D', u'energy': 0.408828, u'liveness': 0.168016, u'tempo': 77.032, u'speechiness': 0.254351, u'acousticness': 0.374682, u'danceability': 0.762201, u'instrumentalness': 0.0, u'key': 3, u'duration': 215.43184, u'loudness': -8.106, u'audio_md5': u'31b0e7b3b9f17fa9f5d66e1a4cd3fb76', u'valence': 0.762356, u'mode': 0}, u'song_currency': 0.000651624548736462, u'artist_familiarity': 0.728608, u'artist_id': u'ARXHGWB1187FB557F5', u'i

In [532]:
lisa_featurings.to_csv('tempdata/stevenfixeslisa2014.csv', encoding='utf-8')
fd=open("tempdata/steven_fixes.json","w")
json.dump(stevenfixes, fd)
fd.close()

In [26]:
weekinfo=[]
for k in sats:
    weekinfo.append(get_weekly(k, satdict))
yearend=get_weekly('end_year', satdict)
weektuples=zip(sats, weekinfo)

In [16]:
def get_for_title(weektups):
    titles={}
    weekindex=1
    for weekdate, weeksrankings in weektups:
        for rankingtuple in weeksrankings:#iterate over the week's top-100
            ranking = rankingtuple[0]
            weekdict=rankingtuple[1]
            if not titles.has_key(weekdict['title']):
                titles[weekdict['title']]=[]
            titles[weekdict['title']].append((weekindex, ranking))
        weekindex=weekindex+1
    return titles

In [17]:
titles = get_for_title(weektuples)

In [None]:
tdict={}
for title in titles.keys():
    wtlist=titles[title]
    weeks=[e[0] for e in wtlist]
    ranks=[e[1] for e in wtlist]
    rankmean=np.mean(ranks)
    rankstd=np.std(ranks, ddof=1)#numpy standard deviation is population based, make it sample based
    ranklen=len(ranks)
    tdict[title.lower()]={'ranks':(rankmean, rankstd, ranklen)}