## Importing necessary libraries

In this section, we import the necessary libraries required for our analysis.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import random
from skimage.feature import hog
from skimage import exposure
import matplotlib.dates as mdates

## Loading and Exploring the data
In this section, we load the dataset into our notebook. We use pandas to read the csv file and store it in a dataframe named df.

In [None]:
df = pd.read_csv("/kaggle/input/spotify-global-top-50-daily-update/playlist.csv")

In [None]:
df.head(5)

In [None]:
# Check the shape and data types of the dataframe
print("Shape of dataframe:", df.shape)
print("\nData types of columns:\n")
df.dtypes

In [None]:
# Check for missing values
print("\nNumber of missing values in each column:\n", df.isnull().sum())

In [None]:
# Basic summary statistics
df.describe()

## Data Visualizations

#### Distribution of track popularity

In [None]:
# Distribution of track popularity
sns.histplot(data=df, x="track_popularity", bins=20)
plt.show()

#### Distribution of track duration

In [None]:
# Plot a histogram of the track duration
plt.hist(df['track_duration_ms']/60000, bins=30)
plt.xlabel('Track Duration (minutes)')
plt.ylabel('Frequency')
plt.title('Distribution of Track Duration')
plt.show()

#### Track popularity vs. position in playlist

In [None]:
# Plot a scatterplot of track popularity vs. position in playlist
plt.scatter(df['position_in_playlist'], df['track_popularity'])
plt.xlabel('Position in Playlist')
plt.ylabel('Track Popularity')
plt.title('Track Popularity vs. Position in Playlist')
plt.show()

#### Relation between track duration and popularity

In [None]:
# Scatter plot of track duration vs. popularity
sns.scatterplot(data=df, x="track_duration_ms", y="track_popularity")
plt.show()

#### Distribution of number of tracks in album

In [None]:
# Bar chart of number of tracks per album
sns.countplot(data=df, x="number_of_tracks_in_album")
plt.show()

#### Boxplotting duration of songs

In [None]:
# Boxplot of track durations
sns.boxplot(data=df, y="track_duration_ms")
plt.show()

#### Pie chart of explicit vs. non-explicit tracks

In [None]:

explicit_counts = df["track_explicit"].value_counts()
plt.pie(explicit_counts, labels=explicit_counts.index, autopct="%1.1f%%")
plt.show()

#### Grouped bar chart of track popularity by explicitness

In [None]:

popularity_by_explicit = df.groupby("track_explicit").mean()["track_popularity"]
sns.barplot(x=popularity_by_explicit.index, y=popularity_by_explicit, data=df)
plt.show()

## Correlations

In [None]:
# Calculate the correlation matrix for the dataset
corr = df.corr()
print("\nCorrelation Matrix:\n")
corr

In [None]:
# for better visuallization of correlation

sns.heatmap(corr, annot=True, cmap="YlGnBu")
plt.show()

## Image Feature analysis

In [None]:
for i, image_path in enumerate(df['images_path']):
    image_path = image_path.split("./")[1]
    image_path = "/kaggle/input/spotify-global-top-50-daily-update/" + image_path
    df['images_path'][i] = image_path

In [None]:
# Calculate HOG features for each image and store them in a new column
hog_features = []
for i in range(len(df)):
    img = cv2.imread(df['images_path'][i], cv2.IMREAD_GRAYSCALE)
    features = hog(img, pixels_per_cell=(8, 8), cells_per_block=(2, 2))
    hog_features.append(features)
df['hog_features'] = hog_features


In [None]:
df.head(5)

In [None]:
# Select four random images
# random.seed(42)
# Select random 12 images from the dataset
indices = random.sample(range(len(df)), 4)

In [None]:
fig, axs = plt.subplots(3, 4, figsize=(32, 16))

for i in range(4):
    # load the image and extract the HOG features
    image_path = df['images_path'][indices[i]]
    img = cv2.imread(image_path)
    features = np.array(df["hog_features"][indices[i]])
    features = np.reshape(features, (len(features),))
    
    _ , hog_img =hog(img, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True, multichannel=True)
    hog_img = exposure.rescale_intensity(hog_img, in_range=(0, 10)) 
    
    # display the original image on the left side
    axs[0, i].imshow(cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB))
    axs[0, i].set_xticks([])
    axs[0, i].set_yticks([])
    axs[0, i].set_title('Image ' + str(i+1))
    
    # display the HOG features on the right side
    axs[1, i].plot(features)
    axs[1, i].set_xticks([])
    axs[1, i].set_yticks([])
    axs[1, i].set_title('HOG Features ' + str(i+1))
    
    # display the original image on the left side
    axs[2, i].imshow(hog_img ,cmap=plt.cm.gray )
    axs[2, i].set_xticks([])
    axs[2, i].set_yticks([])
    axs[2, i].set_title('Hog Image ' + str(i+1))

# display the grid of images and features
plt.show()