# Data Cleaning

In [None]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
import time
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")
import re
import math
import urllib2
import json
import datetime

## Data from Echo Nest

In [3]:
with open("tempdata/steven_songs.json") as json_file:
    board1 = json.load(json_file)

In [384]:
with open("tempdata/lisa_songs.json") as json_file:
    board0 = json.load(json_file)

In [659]:
with open("tempdata/all_songs2015.json") as json_file:
    board2015 = json.load(json_file)

In [660]:
all_songs = board0+board1+board2015

In [661]:
len(all_songs), len(board0), len(board1), len(board2015)

(6273, 2888, 2887, 498)

In [662]:
fd=open("tempdata/all_songs_00_15.json","w")
json.dump(all_songs, fd)
fd.close()

## Data from Billboard Year-End Hot 100

In [381]:
flatframe=pd.read_csv("./tempdata/billboard100.csv")

## Data from Weekly Billboard 100

In [340]:
billboard0=pd.read_csv("./tempdata/lisa2014.csv")

In [339]:
billboard1=pd.read_csv("./tempdata/steven2014.csv")

In [663]:
## 2015 data to predict
billboard2015=pd.read_csv("./tempdata/raw2015data.csv")

#### Songs from Weekly Billboard 100 that did not get info from Echo Nest

In [177]:
exceptions1=pd.read_csv("./tempdata/stevenexceptions.csv")

In [190]:
exceptions0=pd.read_csv("./tempdata/lisaexceptions.csv")

## Transforming Echo Nest data into a Dataframe

In [151]:
flatframe["year"] = flatframe["year"].astype(int)

In [153]:
y00_14 = flatframe[(flatframe.year >= 2000) & (flatframe.year <= 2014)]

In [4]:
columns = ["title", "id", "artist_name", "artist_id", "audio_summary", "artist_discovery",
           "artist_discovery_rank", "artist_familiarity", "artist_familiarity_rank",
           "artist_hotttnesss", "artist_hotttnesss_rank", "artist_location", "song_currency", 
           "song_currency_rank", "song_discovery", "song_discovery_rank", "song_hotttnesss", 
           "song_hotttnesss_rank", "song_type"]

audio_summary = ["acousticness", "analysis_url","audio_md5", "audio_md5", "danceability", "duration", "energy", "key", 
                 "liveness", "loudness","mode", "speechiness", "tempo", "time_signature", "valence"]

In [668]:
# FUNCTION flatten

def flatten(all_songs, billboard):
    print "from echonest:", len(all_songs), "from billboard:", len(billboard) ## should be equal
    songdicts=[]
    fails = []
    exceptions=[]
    
    for a, i in zip(all_songs, range(len(all_songs))):
        d={}
        if a["response"]["status"]["message"] == "Success":

            if  not a["response"]["songs"]:
                exceptions.append(i)
            for b in a["response"]["songs"]:
                for var in set(b.keys()).intersection(columns):
                    if var == "audio_summary":
                        for v in set(b[var].keys()).intersection(audio_summary):
                            d[v] = b[var][v]
                    d[var] = b[var]
                    d["index"] = billboard.index[i]

                songdicts.append(d)  

        # if the requests failed
        else:
            fails.append(i)
            print i, a
    print "# of songs with no echonest info:", len(exceptions)
    print exceptions
    return pd.DataFrame(songdicts)




In [669]:
df1 = flatten(board1, billboard1)
# df1.head()

from echonest: 2887 from billboard: 2887
# of songs with no echonest info: 176
[25, 26, 28, 43, 48, 75, 76, 77, 80, 81, 82, 83, 84, 85, 86, 89, 92, 132, 140, 179, 184, 187, 190, 201, 217, 243, 254, 271, 274, 314, 321, 341, 342, 361, 377, 380, 385, 390, 414, 469, 485, 501, 505, 506, 510, 523, 548, 551, 555, 566, 603, 604, 629, 636, 651, 681, 715, 751, 799, 807, 811, 812, 814, 816, 819, 824, 837, 841, 846, 858, 871, 874, 909, 912, 962, 964, 967, 987, 1003, 1007, 1009, 1056, 1061, 1084, 1089, 1196, 1199, 1201, 1221, 1249, 1346, 1404, 1411, 1439, 1447, 1466, 1489, 1515, 1527, 1561, 1570, 1574, 1587, 1617, 1674, 1684, 1704, 1709, 1764, 1791, 1802, 1824, 1856, 1879, 1902, 1927, 1957, 1975, 2013, 2040, 2072, 2082, 2084, 2088, 2089, 2128, 2131, 2141, 2158, 2169, 2192, 2204, 2207, 2241, 2245, 2251, 2273, 2282, 2304, 2340, 2380, 2385, 2389, 2424, 2431, 2451, 2480, 2525, 2583, 2599, 2618, 2619, 2644, 2646, 2652, 2661, 2668, 2672, 2685, 2695, 2700, 2744, 2754, 2791, 2798, 2819, 2820, 2825, 2827, 2

In [375]:
df1_with_date=pd.merge(df1, billboard1[["date", "ranking"]], left_on=["index"], right_index=True)
df1_with_date.rename(columns={'ranking':'first_ranking'}, inplace=True)
df1_with_date.head()

Unnamed: 0,acousticness,analysis_url,artist_discovery,artist_discovery_rank,artist_familiarity,artist_familiarity_rank,artist_hotttnesss,artist_hotttnesss_rank,artist_id,artist_location,artist_name,audio_md5,audio_summary,danceability,duration,energy,id,index,key,liveness,loudness,mode,song_currency,song_currency_rank,song_discovery,song_discovery_rank,song_hotttnesss,song_hotttnesss_rank,song_type,speechiness,tempo,time_signature,title,valence,date,first_ranking
0,0.035576,http://echonest-analysis.s3.amazonaws.com/TR/v...,0.438732,51784.0,0.70614,660.0,0.743782,566.0,AR7ZFLN1187FB4830A,"{u'latitude': 40.100924, u'location': u'Columb...",Rascal Flatts,,"{u'key': 8, u'analysis_url': u'http://echonest...",0.352911,255.44,0.740418,SOZVKOK12AB018169D,0,8,0.087264,-5.443,1,0.000499,1413984.0,0.000422,3341858.0,0.197551,551933.0,"[studio, electric, vocal]",0.038931,169.553,3,Every Day,0.23214,2008-03-29,98
1,0.013486,http://echonest-analysis.s3.amazonaws.com/TR/J...,0.505778,,0.716241,,0.875501,,AR1IJ1Z11C8A41500D,"{u'latitude': 25.729, u'location': u'Miami, FL...",Flo Rida,,"{u'key': 4, u'analysis_url': u'http://echonest...",0.830666,226.41333,0.579507,SOUOYVQ146BCE8855E,1,4,0.774029,-8.546,0,0.0,,0.0,,0.0,,"[electric, vocal]",0.098374,127.975,4,Stick and Roll,0.639884,2008-04-05,61
2,0.014124,http://echonest-analysis.s3.amazonaws.com/TR/G...,0.334083,295429.0,0.834004,49.0,0.784144,279.0,ARBEOHF1187B9B044D,"{u'latitude': 40.702898, u'location': u'New Yo...",Madonna,,"{u'key': 2, u'analysis_url': u'http://echonest...",0.755065,244.4,0.785185,SOVDPIF15102E6F1D2,2,2,0.062658,-7.239,1,0.0,,0.0,,0.0,,"[studio, electric, vocal]",0.091,113.045,4,4 Minutes,0.682518,2008-04-05,68
3,0.26085,http://echonest-analysis.s3.amazonaws.com/TR/C...,0.476439,17307.0,0.390916,45257.0,0.497726,29812.0,ARL7JTU1187FB5CD41,"{u'latitude': 34.0535, u'location': u'Los Ange...",Ferras,b49da61d5414a50726d859a003ac0457,"{u'key': 5, u'analysis_url': u'http://echonest...",0.475168,203.71156,0.376112,SOFAZMU13E9076C02E,3,5,0.335079,-18.088,1,0.00095,572447.0,0.0,,0.297964,156190.0,"[studio, electric, vocal]",0.038533,77.85,4,Hollywood's Not America,0.571923,2008-04-05,84
4,0.002115,http://echonest-analysis.s3.amazonaws.com/TR/o...,0.389329,137527.0,0.670525,1143.0,0.615818,3053.0,ARQWJK41187FB47D80,"{u'latitude': 33.7483, u'location': u'Atlanta,...",Gnarls Barkley,,"{u'key': 1, u'analysis_url': u'http://echonest...",0.44255,174.04,0.897173,SOUEVRS135C2319C58,4,1,0.339856,-4.75,1,0.02295,30520.0,0.006579,544389.0,0.453361,11482.0,"[studio, electric, vocal]",0.054457,149.946,4,Going On,0.47882,2008-04-05,88


In [385]:
df0 = flatten(board0, billboard0)
# df0.head()

from echonest: 2888 from billboard: 2888
# of songs with no echonest info: 238


In [388]:
df0_with_date=pd.merge(df0, billboard0[["date", "ranking"]], left_on=["index"], right_index=True)
df0_with_date.rename(columns={'ranking':'first_ranking'}, inplace=True)
df0_with_date.head()

Unnamed: 0,acousticness,analysis_url,artist_discovery,artist_discovery_rank,artist_familiarity,artist_familiarity_rank,artist_hotttnesss,artist_hotttnesss_rank,artist_id,artist_location,artist_name,audio_md5,audio_summary,danceability,duration,energy,id,index,key,liveness,loudness,mode,song_currency,song_currency_rank,song_discovery,song_discovery_rank,song_hotttnesss,song_hotttnesss_rank,song_type,speechiness,tempo,time_signature,title,valence,date,first_ranking
0,0.011162,http://echonest-analysis.s3.amazonaws.com/TR/-...,0.327253,320619.0,0.773037,211.0,0.644993,1508.0,ARB054P1187B9AD32E,"{u'latitude': 37.784827, u'location': u'San Fr...",Santana,,"{u'key': 9, u'analysis_url': u'http://echonest...",0.608403,295.41333,0.766908,SOVCIBT14517F409D9,0,9,0.214902,-9.899,1,0.090871,4918.0,0.0,,0.521444,3272.0,"[studio, electric, vocal]",0.029436,115.994,4,Smooth,0.963628,2000-01-01,1
1,0.461383,http://echonest-analysis.s3.amazonaws.com/TR/o...,0.366447,,0.676915,,0.586628,,ARI10GS1187B99B2BD,"{u'latitude': 42.8854, u'location': u'Buffalo,...",Brian McKnight,,"{u'key': 8, u'analysis_url': u'http://echonest...",0.5125,251.06667,0.500628,SOHBLSS12A8C13CA98,1,8,0.08492,-11.744,0,0.000122,,0.000312,,0.088599,,"[studio, vocal]",0.272282,89.219,4,Come Back To Me,0.549607,2000-01-01,2
2,0.023664,http://echonest-analysis.s3.amazonaws.com/TR/P...,0.369704,,0.673539,,0.588086,,ARN7POH1187B9B301E,"{u'latitude': 32.576489, u'location': u'Abilen...",Jessica Simpson,510d03f7f7773ffe84bd6365e75dfc45,"{u'key': 4, u'analysis_url': u'http://echonest...",0.644058,232.59955,0.872039,SOZYMAL14B5D7A5698,2,4,0.033556,-9.151,0,0.006367,,0.0,,0.355779,,"[studio, electric, vocal]",0.04487,130.18,4,I Wanna Love You Forever,0.626382,2000-01-01,3
3,0.046606,http://echonest-analysis.s3.amazonaws.com/TR/7...,0.299033,437039.0,0.811461,83.0,0.654229,1358.0,AR30R5E1187B9AD78A,"{u'latitude': 40.736101, u'location': u'Newark...",Whitney Houston,24aa3d1259e3480900721571bc4fe8d6,"{u'key': 0, u'analysis_url': u'http://echonest...",0.660295,258.09288,0.84701,SOPBEQQ12B0B80B9BD,3,0,0.066933,-6.398,1,0.02866,24931.0,0.002068,1561470.0,0.461754,9856.0,"[studio, electric, vocal]",0.037339,134.98,4,My Love Is Your Love,0.510183,2000-01-01,4
4,0.303738,http://echonest-analysis.s3.amazonaws.com/TR/K...,0.404125,99659.0,0.62296,2220.0,0.587435,3171.0,ARBGWMW1187B9AEA3E,"{u'latitude': -27.5, u'location': u'Brisbane, ...",Savage Garden,e7d7383e265fc8edcc2ecdf2326c22a4,"{u'key': 9, u'analysis_url': u'http://echonest...",0.560336,227.21288,0.487274,SOBCEVK135CB470340,4,9,0.086738,-10.158,1,0.029817,24084.0,0.003956,902780.0,0.458545,10409.0,"[studio, electric, vocal]",0.027601,169.929,4,I Knew I Loved You,0.740737,2000-01-01,5


In [673]:
df2015 = flatten(board2015, billboard2015)
# df1.head()

from echonest: 498 from billboard: 498
# of songs with no echonest info: 31
[11, 24, 33, 45, 61, 68, 76, 111, 151, 152, 157, 203, 204, 215, 234, 301, 328, 335, 386, 394, 396, 428, 453, 458, 461, 466, 479, 490, 492, 495, 496]


In [665]:
df2015_with_date=pd.merge(df2015, billboard2015[["date", "ranking"]], left_on=["index"], right_index=True)
df2015_with_date.rename(columns={'ranking':'first_ranking'}, inplace=True)
df2015_with_date.head()

Unnamed: 0,acousticness,analysis_url,artist_discovery,artist_discovery_rank,artist_familiarity,artist_familiarity_rank,artist_hotttnesss,artist_hotttnesss_rank,artist_id,artist_location,artist_name,audio_md5,audio_summary,danceability,duration,energy,id,index,key,liveness,loudness,mode,song_currency,song_currency_rank,song_discovery,song_discovery_rank,song_hotttnesss,song_hotttnesss_rank,song_type,speechiness,tempo,time_signature,title,valence,date,first_ranking
0,0.085046,http://echonest-analysis.s3.amazonaws.com/TR/c...,0.337995,279733,0.869065,14,0.902243,5,ARS54I31187FB46721,"{u'latitude': 40.357242, u'location': u'Readin...",Taylor Swift,,"{u'key': 5, u'analysis_url': u'http://echonest...",0.752214,231.82667,0.678207,SOHWWBI14957DA760E,0,5,0.130076,-5.421,1,0.087056,5143.0,0.014776,194091.0,0.747001,32.0,"[studio, electric, vocal]",0.064559,96.009,4,Blank Space,0.566263,2015-01-03,1
1,0.336397,http://echonest-analysis.s3.amazonaws.com/TR/r...,0.569392,1015,0.620569,2283,0.824324,48,ARKLCET1407EC5357C,"{u'latitude': 53.2027778, u'location': u'Bray,...",Hozier,,"{u'key': 4, u'analysis_url': u'http://echonest...",0.588576,242.29279,0.577671,SOPUGMY14744C1D82D,1,4,0.115772,-5.143,0,0.068439,7137.0,0.02603,63067.0,0.801319,5.0,"[studio, electric, vocal]",0.052264,128.766,3,Take Me To Church,0.214146,2015-01-03,2
2,0.006586,http://echonest-analysis.s3.amazonaws.com/TR/8...,0.479941,9027,0.706855,658,0.814831,63,ARPM1O31187B9A0ECD,"{u'latitude': 51.5118, u'location': u'Notting ...",Mark Ronson,,"{u'key': 0, u'analysis_url': u'http://echonest...",0.870316,238.68771,0.529591,SOTNABC149A234356E,2,0,0.105547,-6.496,1,0.079952,5624.0,0.059078,13930.0,0.714691,75.0,"[studio, electric, vocal]",0.164026,115.476,4,Uptown Funk,0.892336,2015-01-03,3
3,0.452444,http://echonest-analysis.s3.amazonaws.com/TR/c...,0.489973,5036,0.741532,362,0.896003,10,ARSDWSZ122ECCB706A,"{u'latitude': 52.166667, u'location': u'Suffol...",Ed Sheeran,96db797e671142e6b76aa2ff346096b0,"{u'key': 2, u'analysis_url': u'http://echonest...",0.672997,222.61506,0.222477,SOSACAB14A63CF325C,3,2,0.109256,-17.357,1,0.072791,6387.0,0.0,,0.798272,6.0,"[studio, vocal]",0.04397,161.257,4,Thinking Out Loud,0.649649,2015-01-03,4
4,0.551067,http://echonest-analysis.s3.amazonaws.com/TR/W...,0.587302,743,0.623769,2182,0.854873,23,ARGUQXQ12D5CD78CB1,"{u'latitude': 41.2852, u'location': u'Nantucke...",Meghan Trainor,d19bd325e72776c73ae0e3ef83584e9b,"{u'key': 7, u'analysis_url': u'http://echonest...",0.7416,179.73288,0.38897,SOSVWPE150D28B93BC,4,7,0.275846,-10.45,1,0.0,,0.0,,0.0,,"[studio, vocal]",0.446262,141.374,4,Lips Are Movin,0.83028,2015-01-03,5


#### Second Round Requesting of Echo Nest

In [501]:
with open("tempdata/julie_fixes.json") as json_file:
    board1_add = json.load(json_file)

In [506]:
fixed1=pd.read_csv("./tempdata/juliefixessteven2014.csv")
# fixed1.head()

In [507]:
df1_fixes = flatten(board1_add, fixed1)

from echonest: 176 from billboard: 176
# of songs with no echonest info: 88


In [508]:
df1_fixes_with_date=pd.merge(df1_fixes, fixed1[["date", "ranking"]], left_on=["index"], right_index=True)
df1_fixes_with_date.rename(columns={'ranking':'first_ranking'}, inplace=True)
df1_fixes_with_date.head()

Unnamed: 0,acousticness,analysis_url,artist_discovery,artist_discovery_rank,artist_familiarity,artist_familiarity_rank,artist_hotttnesss,artist_hotttnesss_rank,artist_id,artist_location,artist_name,audio_md5,audio_summary,danceability,duration,energy,id,index,key,liveness,loudness,mode,song_currency,song_currency_rank,song_discovery,song_discovery_rank,song_hotttnesss,song_hotttnesss_rank,song_type,speechiness,tempo,time_signature,title,valence,date,first_ranking
0,0.025617,http://echonest-analysis.s3.amazonaws.com/TR/C...,0.342752,261704.0,0.789005,153.0,0.704079,677.0,ARPDVPJ1187B9ADBE9,"{u'latitude': 33.844371, u'location': u'Atlant...",Usher,,"{u'key': 1, u'analysis_url': u'http://echonest...",0.459909,309.02667,0.493473,SOESSNA135C4E48E81,3,1,0.139882,-7.926,1,0.00019,3474026.0,0.00014,3986033.0,0.158813,862619.0,"[studio, electric, vocal]",0.201134,138.67,4,Love In This Club Part II,0.510526,2008-05-10,79
1,0.107622,http://echonest-analysis.s3.amazonaws.com/TR/P...,0.398322,110677.0,0.534632,7058.0,0.503368,18748.0,AR0GKOG1187B990042,"{u'latitude': 32.6208, u'location': u'Warner R...",Rehab,adbe350f644f0e0929d040f7e270d92d,"{u'key': 11, u'analysis_url': u'http://echones...",0.688443,198.29506,0.454235,SOAZPKY144BF62807D,17,11,0.152648,-15.104,1,0.000121,4074741.0,0.0,,0.102861,1642885.0,"[studio, electric, vocal]",0.073504,80.115,4,Bartender Song (Sittin' At A Bar),0.65585,2008-07-12,94
2,0.010897,http://echonest-analysis.s3.amazonaws.com/TR/v...,0.361316,209468.0,0.710927,623.0,0.618575,2113.0,AR4JOEZ1187B9AE99A,"{u'latitude': 42.347021, u'location': u'Detroi...",Kid Rock,10655ac0129b5b128f8bdcd456bc5993,"{u'key': 7, u'analysis_url': u'http://echonest...",0.723779,215.66649,0.536754,SOOMDKY13B1C86B15D,19,7,0.335675,-15.488,1,0.10694,4174.0,0.0,,0.56863,1335.0,"[studio, electric, vocal]",0.030524,106.213,4,All Summer Long,0.961548,2008-08-30,65
3,0.024695,http://echonest-analysis.s3.amazonaws.com/TR/W...,0.369318,,0.681371,,0.596257,,ARQ2I6K1187FB59949,"{u'latitude': 34.039034, u'location': u'Malibu...",Colbie Caillat,db6c2d884f722b964280d2ad7a1b5192,"{u'key': 1, u'analysis_url': u'http://echonest...",0.696791,185.42621,0.586611,SOCOSZP14591CE9690,20,1,0.092779,-7.05,1,0.000988,,0.001454,,0.225373,,"[studio, electric, vocal]",0.029053,115.039,4,Somethin' Special,0.826815,2008-08-30,98
4,0.116591,http://echonest-analysis.s3.amazonaws.com/TR/H...,0.401276,104959.0,0.620684,2290.0,0.581051,3541.0,ARYF7W11187FB59A55,"{u'latitude': 51.5072648, u'location': u'Londo...",Estelle,2493095c1d049d1f5569cda4d3743f78,"{u'key': 4, u'analysis_url': u'http://echonest...",0.772877,233.7171,0.338031,SOLXEVK13AA4F85523,21,4,0.082269,-16.891,0,0.073026,6365.0,0.0,,0.507432,4256.0,"[studio, electric, vocal]",0.358481,121.504,4,American Boy,0.71252,2008-09-06,85


In [502]:
with open("tempdata/steven_fixes.json") as json_file:
    board0_add = json.load(json_file)

In [503]:
fixed0=pd.read_csv("./tempdata/stevenfixeslisa2014.csv")
# fixed1.head()

In [504]:
df0_fixes = flatten(board0_add, fixed0)

from echonest: 238 from billboard: 238
# of songs with no echonest info: 82


In [505]:
df0_fixes_with_date=pd.merge(df0_fixes, fixed0[["date", "ranking"]], left_on=["index"], right_index=True)
df0_fixes_with_date.rename(columns={'ranking':'first_ranking'}, inplace=True)
df0_fixes_with_date.head()

Unnamed: 0,acousticness,analysis_url,artist_discovery,artist_discovery_rank,artist_familiarity,artist_familiarity_rank,artist_hotttnesss,artist_hotttnesss_rank,artist_id,artist_location,artist_name,audio_md5,audio_summary,danceability,duration,energy,id,index,key,liveness,loudness,mode,song_currency,song_currency_rank,song_discovery,song_discovery_rank,song_hotttnesss,song_hotttnesss_rank,song_type,speechiness,tempo,time_signature,title,valence,date,first_ranking
0,0.305346,http://echonest-analysis.s3.amazonaws.com/TR/2...,0.414328,80307,0.754341,280,0.780348,148,ARXFB7K1187B9B1E54,"{u'latitude': 36.811498, u'location': u'Portsm...",Missy Elliott,09f006264b72923a9c96c4f77070bab8,"{u'key': 10, u'analysis_url': u'http://echones...",0.687011,209.94567,0.702087,SOKXFIM13D71E56249,0,10,0.066558,-7.48,0,0.007195,86142,0.001379,2038227,0.315367,120502,"[studio, electric, vocal]",0.059849,80.997,4,Hot Boyz,0.969931,2000-01-01,7
1,0.884023,http://echonest-analysis.s3.amazonaws.com/TR/Z...,0.329459,311446,0.792197,137,0.682612,937,AR0S7TA1187FB4D024,"{u'latitude': 40.623632, u'location': u'Staten...",Christina Aguilera,,"{u'key': 2, u'analysis_url': u'http://echonest...",0.391944,264.96,0.300259,SOAUZLQ1366D7806E7,2,2,0.105111,-8.452,1,6.8e-05,4747987,0.000108,4041987,0.188182,614425,"[studio, christmas, vocal]",0.03356,71.31,4,The Christmas Song,0.150914,2000-01-01,18
2,0.531084,http://echonest-analysis.s3.amazonaws.com/TR/y...,0.324892,327222,0.739143,385,0.590944,2996,ARQ23MU1187FB3895B,"{u'latitude': 40.7146, u'location': u'New York...",Diddy,,"{u'key': 1, u'analysis_url': u'http://echonest...",0.750252,286.04,0.720956,SOFDSTD1315CD4B6D5,3,1,0.121884,-5.997,1,0.002443,225183,0.000787,2692232,0.264792,243330,"[studio, vocal]",0.188119,87.254,4,Satisfy You,0.638849,2000-01-01,44
3,0.04088,http://echonest-analysis.s3.amazonaws.com/TR/8...,0.415894,77796,0.580891,3943,0.564295,4789,ARCD8BN1187FB5C95E,"{u'latitude': 43.666667, u'location': u'Toront...",Deborah Cox,,"{u'key': 7, u'analysis_url': u'http://echonest...",0.556055,281.2,0.435594,SOBKLLK131343A0D78,5,7,0.119316,-7.906,1,0.000264,1837544,0.000378,3427364,0.163928,813977,"[studio, electric, vocal]",0.033475,119.91,4,We Can't Be Friends,0.080723,2000-01-01,75
4,0.167242,http://echonest-analysis.s3.amazonaws.com/TR/0...,0.467378,17470,0.355922,73363,0.488261,30977,AR1AJN81187FB3892F,"{u'latitude': 29.1875, u'location': u'Ocala, F...",Chad Brock,,"{u'key': 7, u'analysis_url': u'http://echonest...",0.667326,238.30667,0.424349,SOSDVFE1315CD4A478,6,7,0.108491,-9.979,1,0.000284,1782474,0.000734,2772616,0.158991,862168,"[remix, studio, electric, vocal]",0.03176,75.957,4,A Country Boy Can Survive (Y2K Version),0.505989,2000-01-01,93


In [674]:
exceptions2015 = [11, 24, 33, 45, 61, 68, 76, 111, 151, 152, 157, 203, 204, 215, 234, 301, 328, 335, 386, 394, 396, 428, 453, 458, 461, 466, 479, 490, 492, 495, 496]

In [678]:
billboard2015.loc[exceptions2015]

Unnamed: 0.1,Unnamed: 0,artist,date,ranking,title
11,11,Big Sean,2015-01-03,12,I Don't F**k With You
24,24,Bobby Shmurda,2015-01-03,25,Hot Boy
33,33,Craig Wayne Boyd,2015-01-03,34,My Baby's Got A Smile On Her Face
45,45,DeJ Loaf,2015-01-03,46,Try Me
61,61,Nicki Minaj Featruing Skylar Grey,2015-01-03,62,Bed Of Lies
68,68,Lillywood,2015-01-03,69,Prayer In C
76,76,Flo Rida,2015-01-03,77,G.D.F.R.
111,198,Juicy J,2015-01-10,99,Shell Shocked
151,734,"Missy ""Misdemeanor"" Elliott",2015-02-21,35,Work It
152,739,"Missy ""Misdemeanor"" Elliott",2015-02-21,40,Get Ur Freak On


In [679]:
frames = [df0_with_date, df0_fixes_with_date, df1_with_date, df1_fixes_with_date, df2015_with_date,]
df = pd.concat(frames)

#### Editing and Adding New Variables

In [689]:
no_weeks = [52, 52, 52, 52, 53, 52, 52, 52, 52, 53, 52, 52, 52, 52, 52, 53]
years = range(2000,2016)
no_wks = dict(zip(years, no_weeks))

In [750]:
df["billboard_year"] = np.nan
df["billboard_week"] = np.nan
df["latitude"] = np.nan
df["longitude"] = np.nan
df["location"] = ""
df["location_lower"] = ""
maps = {(0,0): "Trial"}

for index, row in df.iterrows():
    year = int(row.date[:4])
    month = int(row.date[5:7])
    day =  int(row.date[8:10])
    today = datetime.date(year, month, day)
    weeks = no_wks[year]
    add = 5
    if (month == 12) & ((today.weekday()-today.day>=-1)):
        add = weeks - today.isocalendar()[1] + 1
        df.ix[index, "billboard_year"] = year+1
        df.ix[index, "billboard_week"] = 1
    elif ((today.isocalendar()[1] + add)%(weeks)==0):
        df.ix[index, "billboard_year"] = year
        df.ix[index, "billboard_week"] = weeks
    else:
        df.ix[index, "billboard_year"] = year
        df.ix[index, "billboard_week"] = (today.isocalendar()[1] + add)%(weeks)
    
    locate = row["artist_location"]
    if locate:
        if locate["latitude"]:
            df.ix[index, "latitude"] = locate["latitude"]
        if locate["longitude"]:
            df.ix[index, "longitude"] = locate["longitude"]
        if locate["location"]:
            if (locate["latitude"]==True) & (locate["longitude"]==True):
                coords = (locate["latitude"], locate["longitude"])
#                 print coords
                if coords in maps:
                    df.ix[index, "location"] = maps[coords]
                    df.ix[index, "location_lower"] = maps[coords].lower()
                else:
                    maps[coords] = locate["location"]
                    df.ix[index, "location"] = locate["location"]
                    df.ix[index, "location_lower"] = locate["location"].lower() 
            else:
                df.ix[index, "location"] = locate["location"]
                df.ix[index, "location_lower"] = locate["location"].lower() 


In [751]:
df["billboard_week"].describe()

count    6072.000000
mean       26.095520
std        15.836728
min         1.000000
25%        11.000000
50%        26.000000
75%        41.000000
max        53.000000
Name: billboard_week, dtype: float64

In [752]:
df.head()

Unnamed: 0,acousticness,analysis_url,artist_discovery,artist_discovery_rank,artist_familiarity,artist_familiarity_rank,artist_hotttnesss,artist_hotttnesss_rank,artist_id,artist_location,artist_name,audio_md5,audio_summary,danceability,duration,energy,id,index,key,liveness,loudness,mode,song_currency,song_currency_rank,song_discovery,song_discovery_rank,song_hotttnesss,song_hotttnesss_rank,song_type,speechiness,tempo,time_signature,title,valence,date,first_ranking,billboard_year,billboard_week,latitude,longitude,location,location_lower
0,0.011162,http://echonest-analysis.s3.amazonaws.com/TR/-...,0.327253,320619.0,0.773037,211.0,0.644993,1508.0,ARB054P1187B9AD32E,"{u'latitude': 37.784827, u'location': u'San Fr...",Santana,,"{u'key': 9, u'analysis_url': u'http://echonest...",0.608403,295.41333,0.766908,SOVCIBT14517F409D9,0,9,0.214902,-9.899,1,0.090871,4918.0,0.0,,0.521444,3272.0,"[studio, electric, vocal]",0.029436,115.994,4,Smooth,0.963628,2000-01-01,1,2015,6,40.357242,-75.940153,"Reading, PA","reading, pa"
1,0.461383,http://echonest-analysis.s3.amazonaws.com/TR/o...,0.366447,,0.676915,,0.586628,,ARI10GS1187B99B2BD,"{u'latitude': 42.8854, u'location': u'Buffalo,...",Brian McKnight,,"{u'key': 8, u'analysis_url': u'http://echonest...",0.5125,251.06667,0.500628,SOHBLSS12A8C13CA98,1,8,0.08492,-11.744,0,0.000122,,0.000312,,0.088599,,"[studio, vocal]",0.272282,89.219,4,Come Back To Me,0.549607,2000-01-01,2,2015,6,53.202778,-6.098333,"Bray, Ireland","bray, ireland"
2,0.023664,http://echonest-analysis.s3.amazonaws.com/TR/P...,0.369704,,0.673539,,0.588086,,ARN7POH1187B9B301E,"{u'latitude': 32.576489, u'location': u'Abilen...",Jessica Simpson,510d03f7f7773ffe84bd6365e75dfc45,"{u'key': 4, u'analysis_url': u'http://echonest...",0.644058,232.59955,0.872039,SOZYMAL14B5D7A5698,2,4,0.033556,-9.151,0,0.006367,,0.0,,0.355779,,"[studio, electric, vocal]",0.04487,130.18,4,I Wanna Love You Forever,0.626382,2000-01-01,3,2015,6,51.5118,-0.20594,"Notting Hill, London, England, GB","notting hill, london, england, gb"
3,0.046606,http://echonest-analysis.s3.amazonaws.com/TR/7...,0.299033,437039.0,0.811461,83.0,0.654229,1358.0,AR30R5E1187B9AD78A,"{u'latitude': 40.736101, u'location': u'Newark...",Whitney Houston,24aa3d1259e3480900721571bc4fe8d6,"{u'key': 0, u'analysis_url': u'http://echonest...",0.660295,258.09288,0.84701,SOPBEQQ12B0B80B9BD,3,0,0.066933,-6.398,1,0.02866,24931.0,0.002068,1561470.0,0.461754,9856.0,"[studio, electric, vocal]",0.037339,134.98,4,My Love Is Your Love,0.510183,2000-01-01,4,2015,6,52.166667,1.0,"Suffolk, England","suffolk, england"
4,0.303738,http://echonest-analysis.s3.amazonaws.com/TR/K...,0.404125,99659.0,0.62296,2220.0,0.587435,3171.0,ARBGWMW1187B9AEA3E,"{u'latitude': -27.5, u'location': u'Brisbane, ...",Savage Garden,e7d7383e265fc8edcc2ecdf2326c22a4,"{u'key': 9, u'analysis_url': u'http://echonest...",0.560336,227.21288,0.487274,SOBCEVK135CB470340,4,9,0.086738,-10.158,1,0.029817,24084.0,0.003956,902780.0,0.458545,10409.0,"[studio, electric, vocal]",0.027601,169.929,4,I Knew I Loved You,0.740737,2000-01-01,5,2015,6,41.2852,-70.0993,"Nantucket, MA, US","nantucket, ma, us"


### Deleting Duplicate Rows
We only want unique combinations of song title and artist.

In [753]:
ids = df[["title", "artist_name", "billboard_year"]]

In [754]:
duplicated=ids[ids.duplicated()]
len(duplicated)

94

In [755]:
len(df[df.title.isin(duplicated.title) & df.artist_name.isin(duplicated.artist_name) 
       & df.billboard_year.isin(duplicated.billboard_year)].sort("title"))

187

In [756]:
df_unique = df.drop(duplicated.index)

In [757]:
ids2 = df_unique[["title", "artist_name", "billboard_year"]]
len(ids2[ids2.duplicated()])

0

### Merging Echo Nest data with Billboard Year-End Hot 100 singles data

We turn song title and artist names to all lowercase to have more successful merges.

In [758]:
df_unique.loc[:,"lower_title"] = df_unique["title"].str.lower()
df_unique.loc[:,"lower_artist"] = df_unique["artist_name"].str.lower()

In [759]:
y00_14.loc[:,"lower_title"] = y00_14["song"].str.lower()
y00_14.loc[:,"lower_artist"] = y00_14["band_singer"].str.lower()

In [760]:
df_all = pd.merge(df_unique, y00_14, how='left', on=["lower_title", "lower_artist"], suffixes=('_x', '_y'))

In [761]:
len(df_all), len(y00_14), len(df_unique), len(df)

(5861, 2050, 5750, 6072)

In [762]:
df_all.ranking.describe()

count    1215.000000
mean       50.226337
std        29.090507
min         1.000000
25%        25.000000
50%        50.000000
75%        76.000000
max       100.000000
Name: ranking, dtype: float64

In [763]:
## number of songs with no ranking
df_all.ranking.isnull().sum()

4646

In [764]:
## songs from Billboard Year-End Hot 100 that did not merge with Echo Nest data
missing_ranks = y00_14[(~y00_14.lower_title.isin(df_all.lower_title))&(~y00_14.lower_artist.isin(df_all.lower_artist))].sort("band_singer")

In [765]:
missing_ranks.head()

Unnamed: 0,year,band_singer,ranking,song,songurl,url,lower_title,lower_artist
1238,2013,ASAP Rocky,41,Fuckin' Problems,/wiki/Fuckin%27_Problems,/wiki/ASAP_Rocky,fuckin' problems,asap rocky
625,2000,Blaque,30,Bring It All to Me,/wiki/Bring_It_All_to_Me,/wiki/Blaque,bring it all to me,blaque
885,2000,Blink 182,40,All The Small Things,/wiki/All_The_Small_Things,/wiki/Blink_182,all the small things,blink 182
1579,2014,Bobby Shmurda,54,Hot Boy,/wiki/Hot_Nigga,/wiki/Bobby_Shmurda,hot boy,bobby shmurda
2585,2005,Bun B,100,Give Me That,/wiki/Give_Me_That,/wiki/Bun_B,give me that,bun b


In [766]:
exceptions1.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,artist,date,ranking,title
0,2913,43426,Carrie Underwood,2008-04-26,27,Praying For Time
1,2914,43442,American Idol Top 8,2008-04-26,43,Shout To The Lord
2,2916,43479,Annie Lennox,2008-04-26,80,Many Rivers To Cross
3,2931,43678,Usher,2008-05-10,79,"Love In This Club, Part II"
4,2936,43695,Ashlee Simpson With Tom Higgenson,2008-05-10,96,Little Miss Obsessive


In [767]:
exceptions0.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,artist,date,ranking,title
0,6,6,"Missy ""Misdemeanor"" Elliott",2000-01-01,7,Hot Boyz
1,9,9,LFO,2000-01-01,10,Girl On TV
2,17,17,Christina Aguilera,2000-01-01,18,The Christmas Song (Chestnuts Roasting On An O...
3,43,43,Puff Daddy,2000-01-01,44,Satisfy You
4,52,52,98 Degrees,2000-01-01,53,This Gift


In [768]:
## songs that not receive data from Echo Nest
frames2 = [exceptions0, exceptions1]
need = pd.concat(frames2)

In [769]:
need.loc[:,"lower_title"] = need["title"].str.lower()
need.loc[:,"lower_artist"] = need["artist"].str.lower()

In [770]:
## figure out where the problems are
probs = []
for index, row in missing_ranks.iterrows():
    found = need[(need["lower_title"] == row.lower_title)&(need["lower_artist"] == row.lower_artist)]
    if len(found):
        print found, "in exceptions"
    else:
        probs.append(index)
        

     Unnamed: 0  Unnamed: 0.1         artist        date  ranking    title lower_title   lower_artist
161        5632         76395  Bobby Shmurda  2014-08-16       96  Hot Boy     hot boy  bobby shmurda in exceptions
     Unnamed: 0  Unnamed: 0.1    artist        date  ranking                     title               lower_title lower_artist
115         897         12754  P. Diddy  2002-06-08       55  I Need A Girl (Part Two)  i need a girl (part two)     p. diddy in exceptions
    Unnamed: 0  Unnamed: 0.1               artist        date  ranking                   title             lower_title         lower_artist
31        3229         47170  Soulja Boy Tell 'Em  2009-01-10       71  Kiss Me Thru The Phone  kiss me thru the phone  soulja boy tell 'em
36        3273         47908  Soulja Boy Tell 'em  2009-03-07        9  Kiss Me Thru The Phone  kiss me thru the phone  soulja boy tell 'em in exceptions
     Unnamed: 0  Unnamed: 0.1 artist        date  ranking     title lower_title lo

In [771]:
missing_ranks.loc[probs]

Unnamed: 0,year,band_singer,ranking,song,songurl,url,lower_title,lower_artist
1238,2013,ASAP Rocky,41,Fuckin' Problems,/wiki/Fuckin%27_Problems,/wiki/ASAP_Rocky,fuckin' problems,asap rocky
625,2000,Blaque,30,Bring It All to Me,/wiki/Bring_It_All_to_Me,/wiki/Blaque,bring it all to me,blaque
885,2000,Blink 182,40,All The Small Things,/wiki/All_The_Small_Things,/wiki/Blink_182,all the small things,blink 182
2585,2005,Bun B,100,Give Me That,/wiki/Give_Me_That,/wiki/Bun_B,give me that,bun b
585,2000,Celine Dion,28,That's The Way It Is,/wiki/That%27s_the_Way_It_Is_(Celine_Dion_song),/wiki/Celine_Dion,that's the way it is,celine dion
1287,2002,Charli Baltimore,44,Down 4 U,/wiki/Down_4_U,/wiki/Charli_Baltimore,down 4 u,charli baltimore
1050,2005,DHT,35,Listen to Your Heart,/wiki/Listen_to_Your_Heart_(Roxette_song)#DHT_...,/wiki/DHT_(band),listen to your heart,dht
516,2011,DJ Frank E,16,Tonight (I'm Lovin' You),/wiki/Tonight_(I%27m_Lovin%27_You),/wiki/DJ_Frank_E,tonight (i'm lovin' you),dj frank e
1139,2014,Descemer Bueno,38,Bailando,/wiki/Bailando_(Enrique_Iglesias_song),/wiki/Descemer_Bueno,bailando,descemer bueno
1070,2005,Edmee,35,Listen to Your Heart,/wiki/Listen_to_Your_Heart_(Roxette_song)#DHT_...,/wiki/Edm%C3%A9e_Daenen,listen to your heart,edmee


In [772]:
## Echo Nest dataset with no ranking
no_ranking = df_all[df_all.ranking.isnull()]

Manually find discrepancies in how songs and/or artists were written on Wikipedia vs. on Echo Nest.

In [773]:
# no_ranking[no_ranking.title.str.contains("I Need a Girl")]

In [774]:
df_all.loc[702, ["year", "band_singer", "ranking", "song", "songurl", "url"]] = missing_ranks.loc[367]

In [775]:
# no_ranking[no_ranking.title.str.contains("Next")]

In [776]:
df_all.loc[4702, ["year", "band_singer", "ranking", "song", "songurl", "url"]] = missing_ranks.loc[2484]

In [777]:
# no_ranking[no_ranking.title.str.contains("Problems")]

In [778]:
df_all.loc[4583, ["year", "band_singer", "ranking", "song", "songurl", "url"]] = missing_ranks.loc[1238]

In [779]:
# df_all[["year", "band_singer", "ranking", "song", "songurl", "url"]].loc[4583]

In [780]:
# no_ranking[no_ranking.title.str.contains("All The Small Things")]

In [781]:
df_all.loc[47, ["year", "band_singer", "ranking", "song", "songurl", "url"]] = missing_ranks.loc[885]

In [782]:
# df_all[["year", "band_singer", "ranking", "song", "songurl", "url"]].loc[47]

In [783]:
# no_ranking[no_ranking.title.str.contains("Your Heart")]

In [784]:
df_all.loc[1674, ["year", "band_singer", "ranking", "song", "songurl", "url"]] = missing_ranks.loc[1050]

In [785]:
# df_all[["year", "band_singer", "ranking", "song", "songurl", "url"]].loc[1674]

In [786]:
## new Echo Nest dataset with no ranking
no_ranking2 = df_all[df_all.ranking.isnull()]

In [787]:
len(no_ranking), len(no_ranking2)

(4646, 4641)

In [788]:
df_all.head()

Unnamed: 0,acousticness,analysis_url,artist_discovery,artist_discovery_rank,artist_familiarity,artist_familiarity_rank,artist_hotttnesss,artist_hotttnesss_rank,artist_id,artist_location,artist_name,audio_md5,audio_summary,danceability,duration,energy,id,index,key,liveness,loudness,mode,song_currency,song_currency_rank,song_discovery,song_discovery_rank,song_hotttnesss,song_hotttnesss_rank,song_type,speechiness,tempo,time_signature,title,valence,date,first_ranking,billboard_year,billboard_week,latitude,longitude,location,location_lower,lower_title,lower_artist,year,band_singer,ranking,song,songurl,url
0,0.023664,http://echonest-analysis.s3.amazonaws.com/TR/P...,0.369704,,0.673539,,0.588086,,ARN7POH1187B9B301E,"{u'latitude': 32.576489, u'location': u'Abilen...",Jessica Simpson,510d03f7f7773ffe84bd6365e75dfc45,"{u'key': 4, u'analysis_url': u'http://echonest...",0.644058,232.59955,0.872039,SOZYMAL14B5D7A5698,2,4,0.033556,-9.151,0,0.006367,,0.0,,0.355779,,"[studio, electric, vocal]",0.04487,130.18,4,I Wanna Love You Forever,0.626382,2000-01-01,3,2015,6,51.5118,-0.20594,"Notting Hill, London, England, GB","notting hill, london, england, gb",i wanna love you forever,jessica simpson,2000.0,Jessica Simpson,56.0,I Wanna Love You Forever,/wiki/I_Wanna_Love_You_Forever,/wiki/Jessica_Simpson
1,0.046606,http://echonest-analysis.s3.amazonaws.com/TR/7...,0.299033,437039.0,0.811461,83.0,0.654229,1358.0,AR30R5E1187B9AD78A,"{u'latitude': 40.736101, u'location': u'Newark...",Whitney Houston,24aa3d1259e3480900721571bc4fe8d6,"{u'key': 0, u'analysis_url': u'http://echonest...",0.660295,258.09288,0.84701,SOPBEQQ12B0B80B9BD,3,0,0.066933,-6.398,1,0.02866,24931.0,0.002068,1561470.0,0.461754,9856.0,"[studio, electric, vocal]",0.037339,134.98,4,My Love Is Your Love,0.510183,2000-01-01,4,2015,6,52.166667,1.0,"Suffolk, England","suffolk, england",my love is your love,whitney houston,2000.0,Whitney Houston,47.0,My Love Is Your Love,/wiki/My_Love_Is_Your_Love_(song),/wiki/Whitney_Houston
2,0.240021,http://echonest-analysis.s3.amazonaws.com/TR/r...,0.453046,30544.0,0.561868,5061.0,0.596687,2763.0,AR6JVK31187FB58FE3,"{u'latitude': 45.05, u'location': u'Turin, Ita...",Eiffel 65,,"{u'key': 7, u'analysis_url': u'http://echonest...",0.812507,210.18667,0.971241,SOOAIRU146168415C1,12,7,0.38653,-6.81,0,0.055708,11236.0,0.006311,567371.0,0.471915,8139.0,"[studio, electric, vocal]",0.052639,127.982,4,Blue (Da Ba Dee),0.795222,2000-01-01,13,2015,6,40.357242,-75.940153,"Reading, PA","reading, pa",blue (da ba dee),eiffel 65,2000.0,Eiffel 65,49.0,Blue (Da Ba Dee),/wiki/Blue_(Da_Ba_Dee),/wiki/Eiffel_65
3,0.161116,http://echonest-analysis.s3.amazonaws.com/TR/O...,0.416272,,0.542365,,0.53219,,ARM8DAD1187B993E2E,"{u'latitude': 26.0657, u'location': u'Davie, F...",Guy,a533657e04f50cb451677695ce32c756,"{u'key': 1, u'analysis_url': u'http://echonest...",0.833038,281.56757,0.337699,SOQSRPK13E90787CD5,18,1,0.185389,-16.155,1,0.0,,0.0,,0.0,,"[studio, electric, vocal]",0.15172,101.108,4,Dancin',0.70558,2000-01-01,19,2015,6,13.083333,80.283333,"Chennai, India","chennai, india",dancin',guy,,,,,,
4,0.562595,http://echonest-analysis.s3.amazonaws.com/TR/j...,0.48727,5662.0,0.417185,31892.0,0.521144,11716.0,AR8VNGZ1187FB4168D,"{u'latitude': 43.6486, u'location': u'Toronto,...",LEN,6ab709aba3e5f7f1b7bf1892307d7c89,"{u'key': 4, u'analysis_url': u'http://echonest...",0.580348,203.04934,0.680611,SOAEYTC144D7F51291,19,4,0.579321,-19.717,1,0.045766,15729.0,0.0,,0.508462,4178.0,"[studio, vocal]",0.068707,96.171,4,Steal My Sunshine,0.729627,2000-01-01,20,2015,6,52.05,4.5,"Zoetermeer, Netherlands","zoetermeer, netherlands",steal my sunshine,len,,,,,,


In [789]:
df_all.to_csv("tempdata/df_all_cleaned5.csv", encoding='utf-8')

In [731]:
df_all_00_14 = df_all[df_all.billboard_year!=2015]

In [732]:
df_all_15 = df_all[df_all.billboard_year==2015]

In [733]:
len(df_all_00_14), len(df_all_15), len(df_all)

(4503, 1358, 5861)

In [734]:
df_all_00_14.to_csv("tempdata/df_all_00_14.csv", encoding='utf-8')

In [735]:
df_all_15.to_csv("tempdata/df_all_15.csv", encoding='utf-8')

In [790]:
df_all.to_csv("tempdata/df_tableau.csv", encoding='utf-8')

In [499]:
df_all.describe()

Unnamed: 0,acousticness,artist_discovery,artist_discovery_rank,artist_familiarity,artist_familiarity_rank,artist_hotttnesss,artist_hotttnesss_rank,danceability,duration,energy,index,key,liveness,loudness,mode,song_currency,song_currency_rank,song_discovery,song_discovery_rank,song_hotttnesss,song_hotttnesss_rank,speechiness,tempo,time_signature,valence,first_ranking,year,ranking,latitude,longitude
count,5392.0,5382.0,4684.0,5392.0,4684.0,5392.0,4685.0,5392.0,5392.0,5392.0,5392.0,5392.0,5392.0,5392.0,5392.0,5392.0,4329.0,5392.0,3292.0,5392.0,4151.0,5385.0,5392.0,5392.0,5392.0,5392.0,1199.0,1199.0,5326.0,5326.0
mean,0.179095,0.402784,165933.453886,0.673517,11261.366354,0.67051,13327.743863,0.60884,231.876253,0.654372,1448.86276,5.324926,0.226727,-8.099701,0.694177,0.022945,558231.59575,0.004625,1266477.124241,0.301004,282551.0,0.10891,119.056897,3.96161,0.53842,75.604228,2007.204337,50.229358,35.951244,-79.644833
std,0.22855,0.072874,203194.496426,0.137285,95817.302989,0.128197,119427.73382,0.14806,53.014764,0.202818,824.620479,3.564513,0.193892,4.67978,0.460798,0.038534,1216953.994071,0.008859,1059935.263814,0.187402,666886.721241,0.122538,28.727475,0.336119,0.223708,23.573655,4.280341,29.077754,11.352465,40.342245
min,1e-06,0.004978,155.0,0.0,2.0,0.0,2.0,0.0,16.4,0.008989,2.0,0.0,0.010603,-33.987,0.0,0.0,609.0,0.0,3247.0,0.0,5.0,0.022356,45.557,1.0,0.027087,1.0,2000.0,1.0,-37.8175,-155.434
25%,0.014672,0.358564,38198.0,0.60653,203.75,0.582626,271.0,0.510652,205.22667,0.510808,738.0,2.0,0.098377,-9.71675,0.0,0.000469,13523.0,0.0,384920.75,0.168665,7818.0,0.035548,95.811,4.0,0.362173,67.0,2004.0,25.0,33.7667,-95.845
50%,0.075353,0.400442,108730.0,0.696815,677.0,0.673785,1032.0,0.613739,227.90667,0.680573,1453.5,6.0,0.148995,-6.4635,1.0,0.006485,52510.0,0.001318,953160.0,0.318696,48431.0,0.053719,119.9625,4.0,0.541774,82.0,2007.0,50.0,36.165688,-84.47405
75%,0.263992,0.44766,220354.0,0.77195,2401.0,0.762624,3522.0,0.711598,253.13,0.820149,2160.0,8.0,0.307396,-4.908,1.0,0.039926,366361.0,0.005299,1958319.0,0.443354,246394.0,0.126049,136.9695,4.0,0.713399,94.0,2011.0,76.0,40.714269,-75.940153
max,0.991571,0.686574,1663683.0,0.888588,1908609.0,0.943333,2203989.0,0.98857,822.39955,0.997509,2887.0,11.0,1.0,-0.598,1.0,0.643517,6135241.0,0.160305,4101112.0,0.801319,6393232.0,0.959492,239.294,7.0,0.979302,100.0,2014.0,100.0,64.15,174.945
