In [1]:
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv('Spotify_2000.csv')

# Display the first 5 rows
print(df.head())

# Print the column names and their data types
print(df.info())

# Print the columns
print(df.columns)

   Index                   Title             Artist            Top Genre  \
0      1                 Sunrise        Norah Jones      adult standards   
1      2             Black Night        Deep Purple           album rock   
2      3          Clint Eastwood           Gorillaz  alternative hip hop   
3      4           The Pretender       Foo Fighters    alternative metal   
4      5  Waitin' On A Sunny Day  Bruce Springsteen         classic rock   

   Year  Beats Per Minute (BPM)  Energy  Danceability  Loudness (dB)  \
0  2004                     157      30            53            -14   
1  2000                     135      79            50            -11   
2  2001                     168      69            66             -9   
3  2007                     173      96            43             -4   
4  2002                     106      82            58             -5   

   Liveness  Valence Length (Duration)  Acousticness  Speechiness  Popularity  
0        11       68          

In [2]:
# Filter the DataFrame to include songs between the years 2000 and 2005, inclusive
df_filtered = df[(df['Year'] >= 2000) & (df['Year'] <= 2005)]

# Group the DataFrame by the 'Year' column and calculate the average 'Beats Per Minute (BPM)'
average_bpm_per_year = df_filtered.groupby('Year')['Beats Per Minute (BPM)'].mean()

print(average_bpm_per_year)

Year
2000    119.151515
2001    125.666667
2002    116.489362
2003    123.142857
2004    120.933333
2005    120.545455
Name: Beats Per Minute (BPM), dtype: float64


In [3]:
# Calculate the year-over-year percentage change in the average BPM
average_bpm_percentage_change = average_bpm_per_year.pct_change() * 100

# Print the average BPM percentage change
print(average_bpm_percentage_change)

Year
2000         NaN
2001    5.467955
2002   -7.302895
2003    5.711676
2004   -1.794277
2005   -0.320738
Name: Beats Per Minute (BPM), dtype: float64


In [4]:
# Find the year with the highest percentage increase in average BPM
year_with_highest_bpm_increase = average_bpm_percentage_change.idxmax()

# Find the year with the highest percentage decrease in average BPM
year_with_highest_bpm_decrease = average_bpm_percentage_change.drop_duplicates().idxmin()

print(f"Year with the highest increase in average BPM: {year_with_highest_bpm_increase}")
print(f"Year with the highest decrease in average BPM: {year_with_highest_bpm_decrease}")

Year with the highest increase in average BPM: 2003
Year with the highest decrease in average BPM: 2002


In [5]:
# Filter the DataFrame to include songs from the year with the highest increase in average BPM (2003)
songs_from_highest_bpm_year = df_filtered[df_filtered['Year'] == year_with_highest_bpm_increase]

# Find the song with the highest BPM in that year
highest_bpm_song = songs_from_highest_bpm_year[songs_from_highest_bpm_year['Beats Per Minute (BPM)'] == songs_from_highest_bpm_year['Beats Per Minute (BPM)'].max()]

# Store the song title and artist in separate variables
highest_bpm_song_title = highest_bpm_song['Title'].tolist()[0]
highest_bpm_artist = highest_bpm_song['Artist'].tolist()[0]

# Filter the DataFrame to include songs from the year with the highest decrease in average BPM (2002)
songs_from_lowest_bpm_year = df_filtered[df_filtered['Year'] == year_with_highest_bpm_decrease]

# Find the song with the lowest BPM in that year
lowest_bpm_song = songs_from_lowest_bpm_year[songs_from_lowest_bpm_year['Beats Per Minute (BPM)'] == songs_from_lowest_bpm_year['Beats Per Minute (BPM)'].min()]

# Store the song title and artist in separate variables
lowest_bpm_song_title = lowest_bpm_song['Title'].tolist()[0]
lowest_bpm_artist = lowest_bpm_song['Artist'].tolist()[0]

# Print the song titles and artists for the highest and lowest BPM songs
print(f"Highest BPM Song: {highest_bpm_song_title} by {highest_bpm_artist}")
print(f"Lowest BPM Song: {lowest_bpm_song_title} by {lowest_bpm_artist}")

Highest BPM Song: Omarm by BLØF
Lowest BPM Song: Zij by Marco Borsato


In [6]:
# Filter the DataFrame to include songs in the "alternative hip hop" genre
df_filtered_alt_hip_hop = df[df['Top Genre'] == 'alternative hip hop']

# Group the DataFrame by the 'Year' column and calculate the average 'Danceability' score
average_danceability_per_year = df_filtered_alt_hip_hop.groupby('Year')['Danceability'].mean()

# Find the year with the highest average danceability score
year_with_highest_danceability = average_danceability_per_year.idxmax()

# Find the year with the lowest average danceability score
year_with_lowest_danceability = average_danceability_per_year.idxmin()

# Print the years with the highest and lowest average danceability scores
print(f"Year with the highest average danceability score: {year_with_highest_danceability}")
print(f"Year with the lowest average danceability score: {year_with_lowest_danceability}")

Year with the highest average danceability score: 2005
Year with the lowest average danceability score: 2001


In [8]:
                      filtered_data = df[df['Year'].between(2000, 2005)]

# Calculating the average BPM for each year
avg_bpm_per_year = filtered_data.groupby('Year')['Beats Per Minute (BPM)'].mean()

# Calculating year-over-year percentage change in average BPM
bpm_percentage_change = avg_bpm_per_year.pct_change() * 100

# Identifying the year with the highest increase and highest decrease in BPM
year_highest_increase = bpm_percentage_change.idxmax()
year_highest_decrease = bpm_percentage_change.idxmin()

# Finding the song with the highest BPM for the year with highest increase
highest_bpm_song_year_increase = filtered_data[filtered_data['Year'] == year_highest_increase].nlargest(1, 'Beats Per Minute (BPM)')

# Finding the song with the lowest BPM for the year with highest decrease
lowest_bpm_song_year_decrease = filtered_data[filtered_data['Year'] == year_highest_decrease].nsmallest(1, 'Beats Per Minute (BPM)')

# Calculating the average danceability score for songs in the "alternative hip hop" genre for each year
alt_hip_hop_data = filtered_data[filtered_data['Top Genre'] == 'alternative hip hop']
avg_danceability_per_year = alt_hip_hop_data.groupby('Year')['Danceability'].mean()

# Identifying the year with the highest and lowest average danceability score
year_highest_danceability = avg_danceability_per_year.idxmax()
year_lowest_danceability = avg_danceability_per_year.idxmin()

{
    "avg_bpm_per_year": avg_bpm_per_year,
    "bpm_percentage_change": bpm_percentage_change,
    "year_highest_increase": year_highest_increase,
    "year_highest_decrease": year_highest_decrease,
    "highest_bpm_song_year_increase": highest_bpm_song_year_increase,
    "lowest_bpm_song_year_decrease": lowest_bpm_song_year_decrease,
    "avg_danceability_per_year": avg_danceability_per_year,
    "year_highest_danceability": year_highest_danceability,
    "year_lowest_danceability": year_lowest_danceability
}
       

{'avg_bpm_per_year': Year
 2000    119.151515
 2001    125.666667
 2002    116.489362
 2003    123.142857
 2004    120.933333
 2005    120.545455
 Name: Beats Per Minute (BPM), dtype: float64,
 'bpm_percentage_change': Year
 2000         NaN
 2001    5.467955
 2002   -7.302895
 2003    5.711676
 2004   -1.794277
 2005   -0.320738
 Name: Beats Per Minute (BPM), dtype: float64,
 'year_highest_increase': 2003,
 'year_highest_decrease': 2002,
 'highest_bpm_song_year_increase':      Index  Title Artist  Top Genre  Year  Beats Per Minute (BPM)  Energy  \
 266    267  Omarm   BLØF  dutch pop  2003                     184      44   
 
      Danceability  Loudness (dB)  Liveness  Valence Length (Duration)  \
 266            27             -9        10        8               252   
 
      Acousticness  Speechiness  Popularity  
 266            41            3          46  ,
 'lowest_bpm_song_year_decrease':      Index Title         Artist      Top Genre  Year  Beats Per Minute (BPM)  \
 133    