### Cross-Database Data Integration and Analysis Project - SPOTIFY 

In [36]:
#import necessary library packages:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import credentials
import os
import time 
import datetime
import requests
import numpy as np
import pandas as pd
import mysql.connector 
import pymysql
from sqlalchemy import create_engine
import pymongo
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from neo4j import GraphDatabase
import pyspark
from pyspark.sql import SparkSession
from pyspark import SparkContext,SparkConf
from pyspark.sql.functions import col, round
from pyspark.sql.functions import col
from pyspark.sql.types import DoubleType
from sklearn.linear_model import LinearRegression
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px


In [2]:
#set up credentials to access Spotify APi data:
client_id = credentials.client_id
client_secret = credentials.client_secret


client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

#### Data fetching

In [3]:
#retrieving Spotify track data from 2010:
artist_id = []
artist = []
track_id = []
track = []
track_popularity = []
album = [] 
release_date = []

for i in range(0,1000,50):
    track_results = sp.search(q="year:2010", type="track", limit=50,offset=i)
    for i, t in enumerate(track_results["tracks"]["items"]):
        artist_id.append(t["artists"][0]["id"])
        artist.append(t["artists"][0]["name"])
        track_id.append(t["id"])
        track.append(t["name"])
        track_popularity.append(t["popularity"])
        album.append(t["album"]["name"])
        release_date.append(t["album"]["release_date"])

In [4]:
#creating pandas DataFrame:
data_df = pd.DataFrame({"artist_id":artist_id,"artist":artist,"track_id":track_id,"track":track,"track_popularity":track_popularity,"album":album,"release_date":release_date} )
print(data_df.shape)

(1000, 7)


In [5]:
data_df

Unnamed: 0,artist_id,artist,track_id,track,track_popularity,album,release_date
0,360IAlyVv4PCEVjgyMZrxK,Miguel,0JXXNGljqupsJaZsgSbMZV,Sure Thing,89,All I Want Is You,2010-11-26
1,4LLpKhyESsyAXpc4laK94U,Mac Miller,7FAFkQQZFeNwOFzTrSDFIh,The Spins,84,K.I.D.S. (Deluxe),2010-08-13
2,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,7BqBn9nzAq8spo5e7cZ0dJ,Just the Way You Are,86,Doo-Wops & Hooligans,2010-10-05
3,6f4XkbvYlXMH0QgVRzW0sM,Waka Flocka Flame,03tqyYWC9Um2ZqU0ZN849H,No Hands (feat. Roscoe Dash & Wale),79,Flockaveli,2010-10-01
4,5jTtGLk1mGFMY5lQOvJYUj,bôa,42qNWdLKCI41S4uzfamhFM,Duvet,84,Twilight,2010-04-20
...,...,...,...,...,...,...,...
995,5PokPZn11xzZXyXSfnvIM3,Lifehouse,5dePpbamNezj1qhHKJPViS,All In,49,Smoke & Mirrors (Deluxe),2010
996,0CKa42Jqrc9fSFbDjePaXP,Big K.R.I.T.,79xRW4JSTWhfsb4Z4WI9Md,Country Sh*t,48,Country Sh*t,2010-01-01
997,4tZwfgrHOc3mvqYlEYSvVi,Daft Punk,0Jc2SfIHv63JNsUZpunh54,"Solar Sailer - From ""TRON: Legacy""/Score",56,TRON: Legacy,2010-12-03
998,4LLpKhyESsyAXpc4laK94U,Mac Miller,0vkbXiCdOXRc74UIVCuvXv,Good Evening,51,K.I.D.S. (Deluxe),2010-08-13


In [6]:
#convert "release_date" to datetime format:
#data_df["release_date"] = pd.to_datetime(data_df["release_date"], format="%Y-%m-%d")

In [7]:
#add additional data to DataFrame:
artist_popularity = []
artist_genres = []
artist_followers = []

for a_id in data_df.artist_id:
    artist = sp.artist(a_id)
    artist_popularity.append(artist["popularity"])
    artist_genres.append(artist["genres"])
    artist_followers.append(artist["followers"]["total"])

In [8]:
data_df = data_df.assign(artist_popularity=artist_popularity, artist_genres=artist_genres, artist_followers=artist_followers)
data_df.head(20)

Unnamed: 0,artist_id,artist,track_id,track,track_popularity,album,release_date,artist_popularity,artist_genres,artist_followers
0,360IAlyVv4PCEVjgyMZrxK,Miguel,0JXXNGljqupsJaZsgSbMZV,Sure Thing,89,All I Want Is You,2010-11-26,77,[r&b],4785290
1,4LLpKhyESsyAXpc4laK94U,Mac Miller,7FAFkQQZFeNwOFzTrSDFIh,The Spins,84,K.I.D.S. (Deluxe),2010-08-13,81,"[hip hop, pittsburgh rap, pop rap, rap]",9488826
2,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,7BqBn9nzAq8spo5e7cZ0dJ,Just the Way You Are,86,Doo-Wops & Hooligans,2010-10-05,87,"[dance pop, pop]",50883790
3,6f4XkbvYlXMH0QgVRzW0sM,Waka Flocka Flame,03tqyYWC9Um2ZqU0ZN849H,No Hands (feat. Roscoe Dash & Wale),79,Flockaveli,2010-10-01,64,"[atl hip hop, dirty south rap, pop rap, rap, s...",2713409
4,5jTtGLk1mGFMY5lQOvJYUj,bôa,42qNWdLKCI41S4uzfamhFM,Duvet,84,Twilight,2010-04-20,66,[],436705
5,360IAlyVv4PCEVjgyMZrxK,Miguel,1eUGmzzvahJjOSWgDHuRlv,Girl With The Tattoo Enter.lewd,82,All I Want Is You,2010-11-26,77,[r&b],4785290
6,6yJCxee7QumYr820xdIsjo,Zac Brown Band,60IGhnH20N82dNxKnc8jDd,Knee Deep (feat. Jimmy Buffett),78,You Get What You Give,2010-09-20,72,"[contemporary country, country, country road, ...",3694056
7,0hCNtLu0JehylgoiP8L4Gh,Nicki Minaj,3hlksXnvbKogFdPbpO9vel,Super Bass,82,Pink Friday (Complete Edition),2010-11-22,86,"[hip pop, pop, queens hip hop, rap]",29500230
8,3FUY2gzHeIiaesXtOAdB7A,Train,4HlFJV71xXKIGcU3kRyttv,"Hey, Soul Sister",85,"Save Me, San Francisco (Golden Gate Edition)",2010-12-01,71,"[dance pop, neo mellow, pop, pop rock]",4355722
9,5K4W6rqBFWDnAN6FQUkS6x,Kanye West,1UGD3lW3tDmgZfAVDh6w7r,Devil In A New Dress,81,My Beautiful Dark Twisted Fantasy,2010-11-22,89,"[chicago rap, hip hop, rap]",21243558


In [9]:
#--->to be added as error occrued
#get track features:
#acousticness = []
#danceability = []
#energy = []
#instrumentalness = []
#liveness = []
#loudness = []
#speechiness = []
#empo = []
#time_signature = []

#for features in data_df.track_id:
#    track_features = sp.audio_features(track_id)
#    acousticness.append(t["acousticness"])
#    danceability.append["danceability"]
#    energy.append(t[0]["energy"])
#    instrumentalness.append(t[0]["instrumentalness"])
#    liveness.append(t[0]["liveness"])
#    loudness.append(t[0]["loudness"])
#    speechiness.append(t[0]["speechiness"])
#    tempo.append(t[0]["tempo"])
#    time_signature.append(t[0]["time_signature"])

In [10]:
#data_df = data_df.assign(acousticness=acousticness, danceability=danceability, energy=energy, instrumentalness=instrumentalness,liveness=liveness, loudness=loudness,speechiness=speechiness, tempo=tempo, time_signature=time_signature)
#data_df.head(20)

In [11]:
#creating pandas DataFrame for MySql database and saving to csv file:
track_df = data_df[["track","artist","album","track_popularity"]]
track_df.head(20)

Unnamed: 0,track,artist,album,track_popularity
0,Sure Thing,Miguel,All I Want Is You,89
1,The Spins,Mac Miller,K.I.D.S. (Deluxe),84
2,Just the Way You Are,Bruno Mars,Doo-Wops & Hooligans,86
3,No Hands (feat. Roscoe Dash & Wale),Waka Flocka Flame,Flockaveli,79
4,Duvet,bôa,Twilight,84
5,Girl With The Tattoo Enter.lewd,Miguel,All I Want Is You,82
6,Knee Deep (feat. Jimmy Buffett),Zac Brown Band,You Get What You Give,78
7,Super Bass,Nicki Minaj,Pink Friday (Complete Edition),82
8,"Hey, Soul Sister",Train,"Save Me, San Francisco (Golden Gate Edition)",85
9,Devil In A New Dress,Kanye West,My Beautiful Dark Twisted Fantasy,81


In [12]:
track_df.to_csv("spotify_mysql.csv")

In [13]:
#creating pandas DataFrame for MongoDB and saving to csv file::
artist_df = data_df[["artist","artist_genres","artist_popularity","artist_followers"]]
artist_df

Unnamed: 0,artist,artist_genres,artist_popularity,artist_followers
0,Miguel,[r&b],77,4785290
1,Mac Miller,"[hip hop, pittsburgh rap, pop rap, rap]",81,9488826
2,Bruno Mars,"[dance pop, pop]",87,50883790
3,Waka Flocka Flame,"[atl hip hop, dirty south rap, pop rap, rap, s...",64,2713409
4,bôa,[],66,436705
...,...,...,...,...
995,Lifehouse,"[neo mellow, pop rock, post-grunge]",62,1971548
996,Big K.R.I.T.,"[conscious hip hop, hip hop, mississippi hip h...",55,955924
997,Daft Punk,"[electro, filter house, rock]",79,9639637
998,Mac Miller,"[hip hop, pittsburgh rap, pop rap, rap]",81,9488826


In [14]:
artist_df.to_csv("spotify_mongodb.csv")

In [15]:
#creating pandas DataFrame for Neo4j and saving to csv file:
artist_album_df = data_df[["artist","track","album"]]
artist_album_df

Unnamed: 0,artist,track,album
0,Miguel,Sure Thing,All I Want Is You
1,Mac Miller,The Spins,K.I.D.S. (Deluxe)
2,Bruno Mars,Just the Way You Are,Doo-Wops & Hooligans
3,Waka Flocka Flame,No Hands (feat. Roscoe Dash & Wale),Flockaveli
4,bôa,Duvet,Twilight
...,...,...,...
995,Lifehouse,All In,Smoke & Mirrors (Deluxe)
996,Big K.R.I.T.,Country Sh*t,Country Sh*t
997,Daft Punk,"Solar Sailer - From ""TRON: Legacy""/Score",TRON: Legacy
998,Mac Miller,Good Evening,K.I.D.S. (Deluxe)


In [16]:
artist_album_df.to_csv("spotify_neo4j.csv")

#### Load data into databases:

In [782]:
#to Mysql:
connection = pymysql.connect(host="localhost", user="root",  password="", database="spotify_db")
cursor = connection.cursor()

create_table_query = """
    CREATE TABLE IF NOT EXISTS tracks (
        track_id INT AUTO_INCREMENT PRIMARY KEY,
        track VARCHAR(60),
        artist VARCHAR(60),
        album VARCHAR(60),
        track_popularity INT
        )
    """

cursor.execute(create_table_query)

0

In [783]:
# Insert whole DataFrame into MySQL
track_df.to_sql("tracks", con = engine, if_exists = "append", chunksize = 1000, index=False)

1000

In [784]:
connection.commit()
connection.close()

In [785]:
#to MongoDB:
data = pd.read_csv("spotify_mongodb.csv")
uri = "uri_mongo"
client = MongoClient(uri, server_api=ServerApi('1'))

In [758]:
db = client["SpotifyDB"]
collection = db["Spotifydata"]

In [760]:
data.reset_index(inplace=True)
data_dict = data.to_dict("records")

In [761]:
collection.insert_many(data_dict)

<pymongo.results.InsertManyResult at 0x1a51d99d030>

In [762]:
client.close()

In [794]:
#to Neo4j:
uri = "neo4j+ssc://14c452fc.databases.neo4j.io"
username = "username"
password = "password"

In [795]:
driver = GraphDatabase.driver(uri,auth=(username,password))

In [981]:
def load_to_neo4j(session, data):
    with session.begin_transaction() as tx:
        for artist in data:
            create_artist_query = (
                "CREATE (
                artist:Artist {"name: $name, track: $track, album: $album"
                              }))
                   
             
      

SyntaxError: unterminated string literal (detected at line 5) (542220062.py, line 5)

In [None]:
with driver.session() as session:
    session.run(cql_create)

#### Extract data from databases:

In [824]:
#MySQL:
import mysql.connector 

connection = mysql.connector.connect(host="localhost", user="root",  password="", database="spotify_db")

sql_select_query = "select * from tracks"
cursor = connection.cursor()
cursor.execute(sql_select_query)

records = cursor.fetchall()
records

In [837]:
#MongoDB:
uri = "uri_mongo"
client = pymongo.MongoClient(uri, server_api=ServerApi('1'))
db = client["SpotifyDB"]
collection = db["Spotifydata"]

x = collection.find()
 
for data in x:
    print(data)

{'_id': ObjectId('64fe0897341b57fdbb129fc9'), 'level_0': 11, 'index': 11, 'Unnamed: 0': 11, 'artist': 'Kesha', 'artist_genres': "['dance pop', 'pop']", 'artist_popularity': 74, 'artist_followers': 7640213}
{'_id': ObjectId('64fe0897341b57fdbb129fbe'), 'level_0': 0, 'index': 0, 'Unnamed: 0': 0, 'artist': 'Miguel', 'artist_genres': "['r&b']", 'artist_popularity': 77, 'artist_followers': 4782088}
{'_id': ObjectId('64fe0897341b57fdbb129fcf'), 'level_0': 17, 'index': 17, 'Unnamed: 0': 17, 'artist': 'Patrick Watson', 'artist_genres': "['indie folk', 'indie quebecois']", 'artist_popularity': 68, 'artist_followers': 609570}
{'_id': ObjectId('64fe0897341b57fdbb129fc1'), 'level_0': 3, 'index': 3, 'Unnamed: 0': 3, 'artist': 'bôa', 'artist_genres': '[]', 'artist_popularity': 66, 'artist_followers': 434749}
{'_id': ObjectId('64fe0897341b57fdbb129fc6'), 'level_0': 8, 'index': 8, 'Unnamed: 0': 8, 'artist': 'Nicki Minaj', 'artist_genres': "['hip pop', 'pop', 'queens hip hop', 'rap']", 'artist_populari

{'_id': ObjectId('64fe0897341b57fdbb12a12c'), 'level_0': 366, 'index': 366, 'Unnamed: 0': 366, 'artist': 'A Day To Remember', 'artist_genres': "['alternative metal', 'metalcore', 'neon pop punk', 'pop emo', 'pop punk', 'screamo']", 'artist_popularity': 67, 'artist_followers': 2268860}
{'_id': ObjectId('64fe0897341b57fdbb12a134'), 'level_0': 374, 'index': 374, 'Unnamed: 0': 374, 'artist': 'Robyn', 'artist_genres': "['dance pop', 'electropop', 'neo-synthpop', 'scandipop', 'swedish electropop', 'swedish pop']", 'artist_popularity': 58, 'artist_followers': 826048}
{'_id': ObjectId('64fe0897341b57fdbb12a0cc'), 'level_0': 270, 'index': 270, 'Unnamed: 0': 270, 'artist': 'Kansas', 'artist_genres': "['album rock', 'classic rock', 'hard rock', 'heartland rock', 'mellow gold', 'progressive rock', 'rock', 'soft rock']", 'artist_popularity': 64, 'artist_followers': 1434482}
{'_id': ObjectId('64fe0897341b57fdbb12a044'), 'level_0': 134, 'index': 134, 'Unnamed: 0': 134, 'artist': 'DJ Khaled', 'artist_

In [838]:
#Neo4j:
from py2neo import Graph,Node,Relationship

graph = Graph(<Database URL>, auth=(<Username>, <Password>))


#### Data transformation:

In [37]:
#clean and transform the extracted data using PySpark:

sparkSession = SparkSession.builder.appName("spark_spotify_df").getOrCreate()

In [18]:
files = [
    r"C:\Users\krent\OneDrive\Desktop\PythonII\PythonIIPractical\FinalProject\spotify_mysql.csv",
    r"C:\Users\krent\OneDrive\Desktop\PythonII\PythonIIPractical\FinalProject\spotify_mongodb.csv",
    r"C:\Users\krent\OneDrive\Desktop\PythonII\PythonIIPractical\FinalProject\spotify_neo4j.csv"
]

In [19]:
spark_spotify_df = sparkSession.read.csv(files, header=True, inferSchema=True)

In [20]:
spark_spotify_df.show(truncate=True)


+---+-----------------+--------------------+-----------------+----------------+
|_c0|           artist|       artist_genres|artist_popularity|artist_followers|
+---+-----------------+--------------------+-----------------+----------------+
|  0|           Miguel|             ['r&b']|               77|         4785290|
|  1|       Mac Miller|['hip hop', 'pitt...|               81|         9488826|
|  2|       Bruno Mars|['dance pop', 'pop']|               87|        50883790|
|  3|Waka Flocka Flame|['atl hip hop', '...|               64|         2713409|
|  4|              bôa|                  []|               66|          436705|
|  5|           Miguel|             ['r&b']|               77|         4785290|
|  6|   Zac Brown Band|['contemporary co...|               72|         3694056|
|  7|      Nicki Minaj|['hip pop', 'pop'...|               86|        29500230|
|  8|            Train|['dance pop', 'ne...|               71|         4355722|
|  9|       Kanye West|['chicago rap', '

In [21]:
spark_spotify_df.printSchema()

root
 |-- _c0: integer (nullable = true)
 |-- artist: string (nullable = true)
 |-- artist_genres: string (nullable = true)
 |-- artist_popularity: string (nullable = true)
 |-- artist_followers: string (nullable = true)



In [38]:
mysql_spark_df = sparkSession.read.csv("spotify_mysql.csv", header=True, inferSchema=True)
mongodb_spark_df = sparkSession.read.csv("spotify_mongodb.csv", header=True, inferSchema=True)
neo4j_spark_df = sparkSession.read.csv("spotify_neo4j.csv", header=True, inferSchema=True)

In [49]:
df = [mysql_spark_df, mongodb_spark_df, neo4j_spark_df]

In [50]:
df = df.filter(col("column_name").isNull).show()

AttributeError: 'list' object has no attribute 'filter'

In [44]:
#check if Pyspark DataFrame has null values:



In [23]:
#checking for duplicates:


In [24]:
#drop column:
spark_spotify_df = spark_spotify_df.drop(col("_c0")) \
  .printSchema()  

root
 |-- artist: string (nullable = true)
 |-- artist_genres: string (nullable = true)
 |-- artist_popularity: string (nullable = true)
 |-- artist_followers: string (nullable = true)



In [25]:
spark_spotify_df

In [None]:
spark.stop()

#### 3. Data Integration:

In [985]:
#after processing data in PySpark convert to Pandas DataFrame:---> add 3 db data combined here
combined_df = spark_spotify_df.toPandas()
combined_df.head()

ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it

In [896]:
df = combined_df.drop(["_c0"], axis =1)

In [897]:
df

Unnamed: 0,artist,artist_genres,artist_popularity,artist_followers
0,Miguel,['r&b'],77,4782088
1,Mac Miller,"['hip hop', 'pittsburgh rap', 'pop rap', 'rap']",81,9479321
2,Bruno Mars,"['dance pop', 'pop']",87,50822492
3,bôa,[],66,434749
4,Miguel,['r&b'],77,4782088
...,...,...,...,...
2995,Jason Aldean,See You When I See You,My Kinda Party,
2996,Bunbury,Frente a frente (feat. Tulsa),Las Consecuencias,
2997,Avenged Sevenfold,Victim,Nightmare,
2998,Ramon Ayala Y Sus Bravos Del Norte,Rinconcito En El Cielo,Cruzando Fronteras,


In [892]:
#Design a schema that accommodates the heterogeneous data types and structures.

#### 4. Data Analysis:
   - Perform exploratory data analysis (EDA) to understand the characteristics of the integrated dataset (average values, standard deviation, etc.).
  

In [898]:
df

Unnamed: 0,artist,artist_genres,artist_popularity,artist_followers
0,Miguel,['r&b'],77,4782088
1,Mac Miller,"['hip hop', 'pittsburgh rap', 'pop rap', 'rap']",81,9479321
2,Bruno Mars,"['dance pop', 'pop']",87,50822492
3,bôa,[],66,434749
4,Miguel,['r&b'],77,4782088
...,...,...,...,...
2995,Jason Aldean,See You When I See You,My Kinda Party,
2996,Bunbury,Frente a frente (feat. Tulsa),Las Consecuencias,
2997,Avenged Sevenfold,Victim,Nightmare,
2998,Ramon Ayala Y Sus Bravos Del Norte,Rinconcito En El Cielo,Cruzando Fronteras,


In [899]:
#Basic information
df.info()
#Describe the data
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   artist             3000 non-null   object
 1   artist_genres      3000 non-null   object
 2   artist_popularity  3000 non-null   object
 3   artist_followers   2000 non-null   object
dtypes: object(4)
memory usage: 93.9+ KB


Unnamed: 0,artist,artist_genres,artist_popularity,artist_followers
count,3000,3000,3000,2000
unique,1390,1753,554,484
top,Taylor Swift,['pop'],71,57
freq,66,47,49,55


In [900]:
#Find the duplicates
df.duplicated().sum()

570

In [577]:
#using Time Series, Linear regression or any other approach  to identify patterns, trends, and correlations.


#### 5. Graph Analysis:

#### 6. Data Visualization:

#### Top 10 most popular songs on Spotify 2010

In [816]:
top_10 = data_df.query("track_popularity > 90", inplace = False).sort_values("track_popularity").head(10)
top_10[:10]

Unnamed: 0,artist_id,artist,track,track_popularity,album,release_date


#### Top 10 least popular songs on Spotify 20210

In [808]:
least = data_df.sort_values("track_popularity", ascending = True).head(10)
least

Unnamed: 0,artist_id,artist,track,track_popularity,album,release_date
996,5j9R5dTGerKvdXopZnfJh9,Ramon Ayala Y Sus Bravos Del Norte,Rinconcito En El Cielo,48,Cruzando Fronteras,2010-10-06
994,0CKa42Jqrc9fSFbDjePaXP,Big K.R.I.T.,Country Sh*t,48,Country Sh*t,2010-01-01
966,07PdYoE4jVRF6Ut40GgVSP,Cartel De Santa,Volar Volar,48,Sincopa,2010-05-25
933,1rKrEdI6GKirxWHxIUPYms,Agnes Obel,Riverside,49,Philharmonics,2010-10-04
967,3FfvYsEGaIb52QPXhg4DcH,Jason Aldean,Country Boy's World,49,My Kinda Party,2010-11-02
949,5PokPZn11xzZXyXSfnvIM3,Lifehouse,Falling In,49,Smoke & Mirrors (Deluxe),2010
910,5BvJzeQpmsdsFp4HGUYUEx,Vampire Weekend,Cousins,49,Contra,2010-01-12
943,27plYDXNJSOD084j39Lmlj,Palomo,Nos Faltó,49,Serie Diamante- 15 Súper Éxitos,2010-01-01
980,40giwFcTQtv9ezxW8yqxJU,Chiddy Bang,Mind Your Manners (feat. Icona Pop),49,Breakfast,2010-09-13
981,3FfvYsEGaIb52QPXhg4DcH,Jason Aldean,Texas Was You,49,My Kinda Party,2010-11-02


In [820]:
sns.set_style(style="darkgrid")
plt.figure(figsize=(10,5))
popular = artist_genres.sort_values('artist_popularity', ascending=False).head(10)
sns.barplot(y = 'artist_genres', x = 'artist_popularity', data = popular).set(title='Top 5 Genres by Popularity')

AttributeError: 'list' object has no attribute 'sort_values'

<Figure size 1000x500 with 0 Axes>

#### 7. Insights and Reporting:

#### 8. Performance Optimization: