In [1]:
%matplotlib inline

In [2]:
# Import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress
import os
from datetime import datetime as dt 

In [3]:
# Load in file -- 586672 tracks
file_path = os.path.join(os.getcwd(),'Resources', 'tracks.csv')
tracks_df = pd.read_csv(file_path)
tracks_df.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,35iwgR4jXetI318WEWsa1Q,Carve,6,126903,0,['Uli'],['45tIt06XoI0Iio4LBEVpls'],1922-02-22,0.645,0.445,0,-13.338,1,0.451,0.674,0.744,0.151,0.127,104.851,3
1,021ht4sdgPcrDgSk7JTbKY,Capítulo 2.16 - Banquero Anarquista,0,98200,0,['Fernando Pessoa'],['14jtPCOoNZwquk5wd9DxrY'],1922-06-01,0.695,0.263,0,-22.136,1,0.957,0.797,0.0,0.148,0.655,102.009,1
2,07A5yehtSnoedViJAZkNnc,Vivo para Quererte - Remasterizado,0,181640,0,['Ignacio Corsini'],['5LiOoJbxVSAMkBS2fUm3X2'],1922-03-21,0.434,0.177,1,-21.18,1,0.0512,0.994,0.0218,0.212,0.457,130.418,5
3,08FmqUhxtyLTn6pAh6bk45,El Prisionero - Remasterizado,0,176907,0,['Ignacio Corsini'],['5LiOoJbxVSAMkBS2fUm3X2'],1922-03-21,0.321,0.0946,7,-27.961,1,0.0504,0.995,0.918,0.104,0.397,169.98,3
4,08y9GfoqCWfOGsKdwojr5e,Lady of the Evening,0,163080,0,['Dick Haymes'],['3BiJGZsyX9sJchTqcSA7Su'],1922,0.402,0.158,3,-16.9,0,0.039,0.989,0.13,0.311,0.196,103.22,4


In [4]:
# Clean and format data 
clean_df = tracks_df[['id', 'name', 'popularity', 'duration_ms', 'explicit', 'artists', 'release_date', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']]

In [5]:
# dropping rows with missing values
clean_df = clean_df.dropna(how = 'any') # 586601 tracks
clean_df.head(10)

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,35iwgR4jXetI318WEWsa1Q,Carve,6,126903,0,['Uli'],1922-02-22,0.645,0.445,-13.338,0.451,0.674,0.744,0.151,0.127,104.851
1,021ht4sdgPcrDgSk7JTbKY,Capítulo 2.16 - Banquero Anarquista,0,98200,0,['Fernando Pessoa'],1922-06-01,0.695,0.263,-22.136,0.957,0.797,0.0,0.148,0.655,102.009
2,07A5yehtSnoedViJAZkNnc,Vivo para Quererte - Remasterizado,0,181640,0,['Ignacio Corsini'],1922-03-21,0.434,0.177,-21.18,0.0512,0.994,0.0218,0.212,0.457,130.418
3,08FmqUhxtyLTn6pAh6bk45,El Prisionero - Remasterizado,0,176907,0,['Ignacio Corsini'],1922-03-21,0.321,0.0946,-27.961,0.0504,0.995,0.918,0.104,0.397,169.98
4,08y9GfoqCWfOGsKdwojr5e,Lady of the Evening,0,163080,0,['Dick Haymes'],1922,0.402,0.158,-16.9,0.039,0.989,0.13,0.311,0.196,103.22
5,0BRXJHRNGQ3W4v9frnSfhu,Ave Maria,0,178933,0,['Dick Haymes'],1922,0.227,0.261,-12.343,0.0382,0.994,0.247,0.0977,0.0539,118.891
6,0Dd9ImXtAtGwsmsAD69KZT,La Butte Rouge,0,134467,0,['Francis Marty'],1922,0.51,0.355,-12.833,0.124,0.965,0.0,0.155,0.727,85.754
7,0IA0Hju8CAgYfV1hwhidBH,La Java,0,161427,0,['Mistinguett'],1922,0.563,0.184,-13.757,0.0512,0.993,1.6e-05,0.325,0.654,133.088
8,0IgI1UCz84pYeVetnl1lGP,Old Fashioned Girl,0,310073,0,['Greg Fieler'],1922,0.488,0.475,-16.222,0.0399,0.62,0.00645,0.107,0.544,139.952
9,0JV4iqw2lSKJaHBQZ0e5zK,Martín Fierro - Remasterizado,0,181173,0,['Ignacio Corsini'],1922-03-29,0.548,0.0391,-23.228,0.153,0.996,0.933,0.148,0.612,75.595


In [6]:
## Looking at tracks, unique titles, artists, & trying to determine possible duplicates
total_tracks = len(clean_df)
unique_songs = clean_df['name'].nunique()
unique_artists = clean_df['artists'].nunique()
pos_dupes = total_tracks - unique_songs
print(f"Number of lines in current set = {total_tracks} \n The number of unique song titles = {unique_songs} \n The number of artists = {unique_artists}")
print(f"\n Possible duplicates = {pos_dupes}, which is {round(pos_dupes/total_tracks*100,2)}%. Because different artists have songs with the same titles, the actual percentage of duplicates is less than this.")

Number of lines in current set = 586601 
 The number of unique song titles = 446474 
 The number of artists = 114029

 Possible duplicates = 140127, which is 23.89%. Because different artists have songs with the same titles, the actual percentage of duplicates is less than this.


In [64]:
# smaller dataframe that might help figure out loops 
# 2962 rows
# different artists & different song titles -- but should have duplicates of both
contains_one_df = clean_df.loc[clean_df['name'].str.contains('One')].reset_index()
contains_one_df.head(10)

Unnamed: 0,index,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,207,3DzcB6weYL3L7oRNmsJCaV,No One Else But You,6,201267,0,['Louis Armstrong & His Savoy Ballroom Five'],1923,0.496,0.311,-11.202,0.124,0.923,0.0127,0.252,0.718,191.035
1,491,41RLkvyELWv5ydLwQ6FsLu,You Always Hurt the One You Love,0,209058,0,['Harry Curtis'],1923,0.475,0.294,-12.168,0.0381,0.88,0.0,0.109,0.236,126.301
2,999,5hGmr6KQWzcCGdTYxf4P4S,Stompin' at the Savoy - Mix One,0,192680,0,['Lionel Hampton Orchestra'],1924,0.742,0.805,-5.123,0.036,0.919,0.578,0.259,0.84,112.84
3,1520,3lZDXtwP8CdwuSO2PPJfaX,One Night In Monte Carlo,4,182933,0,['Tommy Dorsey & His Clambake Seven'],1925,0.796,0.341,-13.195,0.0567,0.97,0.0313,0.215,0.946,117.325
4,1558,4IYXV4qfys6ybXI5FhjTko,No One Else But You,2,200067,0,"['Louis Armstrong & His Hot Seven', 'Carroll D...",1925,0.531,0.291,-11.169,0.11,0.976,0.0392,0.255,0.848,192.062
5,1658,5YOdjn1A3ezJfYT8Wa06zh,No One Else But You,0,200067,0,['Louis Armstrong Hot Seven sic - Big Band;Car...,1925,0.531,0.291,-11.169,0.11,0.976,0.0392,0.255,0.848,192.062
6,1785,6zlJdSFWE8GMs46gXzTOkf,No One Else But You,0,200067,0,"['Louis Armstrong & His Hot Seven', 'Carroll D...",1925,0.531,0.291,-11.169,0.11,0.976,0.0392,0.255,0.848,192.047
7,2795,6TV9XorVkeDwjyaQCf7xhd,Dear One,0,176027,0,['Ted Weems & His Orchestra'],1926-07-01,0.692,0.177,-12.051,0.0399,0.993,0.00164,0.0805,0.451,107.099
8,3956,7oDm0X9r6FDGwHv0VsLH4p,No One Else But You,9,200333,0,"['Louis Armstrong & His Hot Seven', 'Carroll D...",1928,0.531,0.319,-10.174,0.12,0.974,0.0244,0.275,0.837,191.732
9,4283,29l7bGW6qVoPqm0X9VZsjF,A Shropshire Lad: When I was One-and-Twenty,0,90000,0,"['George Butterworth', 'John Cameron']",1928,0.391,0.0341,-22.964,0.051,0.993,0.0,0.345,0.186,83.791


In [65]:
# converting release_date to year & adding column for year
contains_one_df['year'] = pd.DatetimeIndex(contains_one_df['release_date']).year
contains_one_df

Unnamed: 0,index,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,year
0,207,3DzcB6weYL3L7oRNmsJCaV,No One Else But You,6,201267,0,['Louis Armstrong & His Savoy Ballroom Five'],1923,0.496,0.3110,-11.202,0.1240,0.9230,0.012700,0.2520,0.7180,191.035,1923
1,491,41RLkvyELWv5ydLwQ6FsLu,You Always Hurt the One You Love,0,209058,0,['Harry Curtis'],1923,0.475,0.2940,-12.168,0.0381,0.8800,0.000000,0.1090,0.2360,126.301,1923
2,999,5hGmr6KQWzcCGdTYxf4P4S,Stompin' at the Savoy - Mix One,0,192680,0,['Lionel Hampton Orchestra'],1924,0.742,0.8050,-5.123,0.0360,0.9190,0.578000,0.2590,0.8400,112.840,1924
3,1520,3lZDXtwP8CdwuSO2PPJfaX,One Night In Monte Carlo,4,182933,0,['Tommy Dorsey & His Clambake Seven'],1925,0.796,0.3410,-13.195,0.0567,0.9700,0.031300,0.2150,0.9460,117.325,1925
4,1558,4IYXV4qfys6ybXI5FhjTko,No One Else But You,2,200067,0,"['Louis Armstrong & His Hot Seven', 'Carroll D...",1925,0.531,0.2910,-11.169,0.1100,0.9760,0.039200,0.2550,0.8480,192.062,1925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2957,586139,204anQrlLm8N1CNjzJlXSs,One Voice,25,353933,0,"['Marty Nystrom', ""Integrity's Hosanna! Music""]",1993,0.363,0.2230,-14.234,0.0322,0.6710,0.000000,0.3550,0.1500,130.265,1993
2958,586414,2tSPS5dRintqVulKAKni2T,One (feat. 지선),42,230320,0,"['Epik High', '지선']",2008-04-17,0.742,0.7830,-4.294,0.0368,0.0765,0.000104,0.0558,0.5940,135.020,2008
2959,586469,3hW0KHABBsresOUik93KF6,"One, Two, Buckle My Shoe",49,33333,0,['Mother Goose Club'],2011-04-29,0.912,0.5730,-11.185,0.0936,0.0152,0.000000,0.0715,0.9640,99.972,2011
2960,586514,2iSfhXTXCTXthD0ci36yhS,"Fifth Sunday of Lent, Year C: Has No One Conde...",1,37333,0,['Christopher Walker'],2014-05-01,0.217,0.0192,-26.233,0.0503,0.9850,0.018700,0.2250,0.0376,102.292,2014


In [8]:
# smaller dataframe that might help figure out loops 
# 290 rows; 203 songs
# one artist & different song titles -- but few duplicates of song titles
u2_df = clean_df.loc[clean_df['artists'] == "['U2']"].reset_index()
u2_songs = u2_df['name'].nunique()  #203
u2_df

Unnamed: 0,index,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,53586,0T702y9ndzZTnPq1bqiAM2,I Will Follow - Remastered 2008,59,217387,0,['U2'],1980-10-20,0.360,0.909,-6.827,0.0663,0.000347,0.000033,0.2820,0.647,155.293
1,53947,7dni8dPFeedwSTX8hqcC3s,Out Of Control - Remastered 2008,47,254040,0,['U2'],1980-10-20,0.208,0.878,-8.731,0.1300,0.000378,0.012000,0.0935,0.401,155.162
2,54306,37r50D42oXhzxtH3VyBzJX,Stories For Boys - Remastered 2008,40,182427,0,['U2'],1980-10-20,0.228,0.905,-7.047,0.0575,0.031900,0.000014,0.1130,0.423,155.180
3,54360,3OZG87f4TFHCt0t9VStdH0,I Will Follow - Remastered 2008,40,217387,0,['U2'],1980-10-20,0.360,0.909,-6.827,0.0663,0.000347,0.000033,0.2820,0.647,155.293
4,54380,3w6Ml5q6dZ1WVcwHIa0JyI,Twilight - Remastered 2008,39,262453,0,['U2'],1980-10-20,0.377,0.776,-8.707,0.0355,0.004100,0.267000,0.1090,0.419,141.168
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,549402,6AKfPkyy8yTfQKU48TuoMW,Sweetest Thing,25,184543,0,['U2'],1987-03-03,0.524,0.641,-8.967,0.0497,0.043700,0.000028,0.1590,0.482,141.650
286,549506,6abwt2tM2wEwS1HPTFwNsr,In God's Country,16,176597,0,['U2'],1987-03-03,0.497,0.704,-8.085,0.0324,0.007100,0.000067,0.7830,0.607,125.919
287,549514,6aaKZMKFmxKbBJNjSW59O2,Bullet The Blue Sky,15,271547,0,['U2'],1987-03-03,0.364,0.638,-10.240,0.0441,0.007350,0.399000,0.1330,0.451,152.509
288,549632,05lBOiUnCwQaulWFMflvHT,Running To Stand Still,11,257366,0,['U2'],1987-03-03,0.537,0.199,-18.726,0.0287,0.861000,0.025900,0.3340,0.229,94.634


203

In [47]:
# smaller dataframe for testing loops
# 77 tracks, 34 songs
# one artist & multiple duplicates that look like exact same track
gaga_df = clean_df.loc[clean_df['artists'] == "['Lady Gaga']"]
gaga_songs = gaga_df['name'].nunique()     
gaga_df

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
81271,1QV6tiMFM6fSOKOGLMHYYg,Poker Face,78,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.620,0.0787,0.11800,0.000002,0.1210,0.787,118.999
81300,5R8dQOPq8haW94K7mgERlO,Poker Face,75,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.620,0.0787,0.11800,0.000002,0.1210,0.787,118.999
81331,02XnQdf7sipaKBBHixz3Zp,Paparazzi,72,208307,0,['Lady Gaga'],2008-01-01,0.762,0.692,-3.973,0.0438,0.11300,0.000000,0.0940,0.397,114.906
81417,7Hqig8kp32q2Ire3ECQvWM,Paparazzi,69,208307,0,['Lady Gaga'],2008-01-01,0.762,0.692,-3.973,0.0438,0.11300,0.000000,0.0940,0.397,114.906
81543,0eH2eHURaXUP15D8gQlfjx,LoveGame,65,216333,0,['Lady Gaga'],2008-01-01,0.894,0.678,-5.611,0.0523,0.00569,0.000002,0.3170,0.844,105.024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
558981,4q57oGT90FfPHf7vWp32Ho,Judas,38,249067,0,['Lady Gaga'],2011-01-01,0.664,0.934,-3.848,0.0697,0.00108,0.000028,0.2700,0.530,130.982
559227,69jnWw97sX1dYK2grPhyF9,You And I,31,307360,0,['Lady Gaga'],2011-01-01,0.521,0.701,-5.238,0.0458,0.08920,0.000000,0.0876,0.518,127.087
559768,1EntfuxWPYjI8ONzkfNb0W,Applause,40,212333,0,['Lady Gaga'],2013-11-11,0.669,0.780,-4.287,0.0530,0.02650,0.000002,0.1430,0.738,139.945
561178,3jArQ9tPKWxKnn18iXS3TF,Million Reasons,41,205280,0,['Lady Gaga'],2016-10-21,0.666,0.423,-8.012,0.0430,0.49400,0.000000,0.1060,0.154,129.890


In [10]:
# smaller data frame for testing loops
# 8 rows
# just 1 song -- but looks like several different versions based on acoustic values
poker_face_df = gaga_df.loc[gaga_df['name'].str.contains('Poker Face')].reset_index()
poker_face_df

Unnamed: 0,level_0,index,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0,81271,1QV6tiMFM6fSOKOGLMHYYg,Poker Face,78,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999
1,1,81300,5R8dQOPq8haW94K7mgERlO,Poker Face,75,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999
2,23,110065,4liFTpYparE2zEUp2GfG64,Poker Face,36,238907,0,['Lady Gaga'],2020-10-23,0.851,0.818,-4.637,0.0789,0.112,3e-06,0.0945,0.808,119.006
3,27,130728,7zACUQbWQxzT4aER7ityBV,Poker Face,57,237200,0,['Lady Gaga'],2009-11-05,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999
4,37,200963,4wB4cPK52RCQviHm4SC2wa,Poker Face,9,238907,0,['Lady Gaga'],2020-10-23,0.851,0.818,-4.637,0.0789,0.112,3e-06,0.0945,0.808,119.006
5,58,387594,3cJTPvEe33hGNJguNPAoBC,Poker Face,46,237533,0,['Lady Gaga'],2008-09-26,0.849,0.824,-4.61,0.0788,0.128,3e-06,0.0867,0.785,119.001
6,67,444082,0gVpWodvUHcvcgVjK26Jga,Poker Face,3,238320,0,['Lady Gaga'],2015-12-11,0.85,0.784,-5.668,0.0785,0.134,3e-06,0.0997,0.779,119.0
7,76,583175,70vvnTUamBXOc0vRk7BBDu,Poker Face,50,237200,0,['Lady Gaga'],2009-11-18,0.851,0.806,-4.618,0.0788,0.12,1e-06,0.122,0.775,119.002


### List of dataframes
* clean_df = 586601 lines -- what ultimately needs to be cleaned
* contains_one_df = 2962 lines -- should have duplicate artists & duplicate tracks in varying combos
* u2_df = 290 lines -- just U2, has duplicate tracks but some are definitely different versions
* gaga_df = 77 lines -- just Lady Gaga, has duplicate tracks
* poker_face_df = 8 lines -- all versions of "Poker Face"--4 of which look identical except popularity

### Cleaning for artist x number of tracks popularity
1. remove release dates pre-1950 bc contemporary pop music is considered to have started in mid-1950s, so removing older tracks seems reasonable
2. 

In [51]:
working_df = tracks_df.loc[['artists'] == 'Lady Gaga']

KeyError: False

In [None]:
### 1. Removing tracks prior to 1950
1.1 getting year for all tracks 
* BUILD ON gaga_df 
* need to transform release date from string to int  -- this could be tricky because some columns only had year to begin with
* need to extract year from release_date
* save year to new column
* TEST ON contains_one_df

1.2 remove tracks older than specified year
* TEST ON u2_df with release pre-1990
#. post_1950_df = tracks_df.loc[tracks_df['release_date'] > 1950, :]

In [58]:
pd.options.mode.chained_assignment = None # was sometimes getting an error with this, so found this solution online
# add a column for year
gaga_df['Year'] = pd.to_datetime(gaga_df['release_date'])
gaga_df.head(3)

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,Year
81271,1QV6tiMFM6fSOKOGLMHYYg,Poker Face,78,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999,2008-01-01
81300,5R8dQOPq8haW94K7mgERlO,Poker Face,75,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999,2008-01-01
81331,02XnQdf7sipaKBBHixz3Zp,Paparazzi,72,208307,0,['Lady Gaga'],2008-01-01,0.762,0.692,-3.973,0.0438,0.113,0.0,0.094,0.397,114.906,2008-01-01


In [60]:
gaga_df['Year2'] = pd.DatetimeIndex(gaga_df['Year']).year
gaga_df.head(5)

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,Year,Year2
81271,1QV6tiMFM6fSOKOGLMHYYg,Poker Face,78,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999,2008-01-01,2008
81300,5R8dQOPq8haW94K7mgERlO,Poker Face,75,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999,2008-01-01,2008
81331,02XnQdf7sipaKBBHixz3Zp,Paparazzi,72,208307,0,['Lady Gaga'],2008-01-01,0.762,0.692,-3.973,0.0438,0.113,0.0,0.094,0.397,114.906,2008-01-01,2008
81417,7Hqig8kp32q2Ire3ECQvWM,Paparazzi,69,208307,0,['Lady Gaga'],2008-01-01,0.762,0.692,-3.973,0.0438,0.113,0.0,0.094,0.397,114.906,2008-01-01,2008
81543,0eH2eHURaXUP15D8gQlfjx,LoveGame,65,216333,0,['Lady Gaga'],2008-01-01,0.894,0.678,-5.611,0.0523,0.00569,2e-06,0.317,0.844,105.024,2008-01-01,2008


In [48]:
# Changing release date to year
## use iterrows to iterate through pandas dataframe
#. for index, row in gaga_df.iterrows():
for index, row in gaga_df.iterrows():
    
    this_release = row['release_date'] # not sure I will need this
    # convert date using datetime.strptime
    print(this_release)
    # try:
    gaga_date = dt.strptime((gaga_df.loc[row, 'release_date']), '%Y-%m-%d')
    gaga_year = gaga_date.year
    gaga_df[index, 'Year'] = gaga_year
    # except (KeyError, IndexError):
     #   gaga_df[row, 'year'] = gaga_df.loc[row, 'release_date']
      #  print(f"check row {row} for strings")

2008-01-01


KeyError: "None of [Index(['1QV6tiMFM6fSOKOGLMHYYg',             'Poker Face',\n                             78,                   237200,\n                              0,          '['Lady Gaga']',\n                   '2008-01-01',                    0.851,\n             0.8059999999999999,                    -4.62,\n                         0.0787,      0.11800000000000001,\n                       1.64e-06,                    0.121,\n                          0.787,                  118.999],\n      dtype='object')] are in the [index]"

In [13]:
# THIS WORKS -- extracting date with datetime
gaga_date = dt.strptime((gaga_df.loc[0,'release_date']), '%Y-%m-%d') # initially couldn't get to work because the index number wasn't 1; should probably reindex along the way
gaga_date

datetime.datetime(2008, 1, 1, 0, 0)

In [14]:
# THIS WORKS -- extracting year from datetime date
gaga_year = gaga_date.year
gaga_year

2008

In [28]:
# THIS WORKS -- putting specific value into specific location of df
gaga_df.at[0, 'Year'] = gaga_year
gaga_df.head(3)

Unnamed: 0,index,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,Year
0,81271,1QV6tiMFM6fSOKOGLMHYYg,Poker Face,78,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999,2008.0
1,81300,5R8dQOPq8haW94K7mgERlO,Poker Face,75,237200,0,['Lady Gaga'],2008-01-01,0.851,0.806,-4.62,0.0787,0.118,2e-06,0.121,0.787,118.999,
2,81331,02XnQdf7sipaKBBHixz3Zp,Paparazzi,72,208307,0,['Lady Gaga'],2008-01-01,0.762,0.692,-3.973,0.0438,0.113,0.0,0.094,0.397,114.906,


# Saving the unique cities may give clue to saving the unique songs -- so maybe check artist, then name, then duration & keep it if duration is different?
# From WeatherPy
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)
    
# Print the city count to confirm sufficient count
len(cities)