# Followers fix

## Setup

In [None]:
# Import modules
import pandas as pd

from collections import Counter
from tqdm import tqdm


In [None]:
# Import the data
followers_df = pd.read_csv('/Users/julienmbarki/Documents/Doctorat/Publications/Article 2/Data/Code/data_management/db_soundcharts/editorial_tracks_followers_23-24.csv')
main_df = pd.read_csv('/Users/julienmbarki/Documents/Doctorat/Publications/Article 2/Data/Code/data_management/editorial_playlists_23-24_final.csv')
display(followers_df)


## Clean followers data

In [None]:
# Collapse to weekly level playlists data
# Convert date column to datetime
followers_df['playlist_followers_date'] = pd.to_datetime(followers_df['playlist_followers_date'])

# Extract year and week number
followers_df['year'] = followers_df['playlist_followers_date'].dt.isocalendar().year
followers_df['week'] = followers_df['playlist_followers_date'].dt.isocalendar().week
followers_df['weekday'] = followers_df['playlist_followers_date'].dt.weekday  # Monday=0, ..., Sunday=6

# Sort data to ensure selection priority
followers_df.sort_values(by=['playlist_id', 'year', 'week', 'weekday'], ascending=[True, True, True, True], inplace=True)

# Keep only one entry per playlist_id per week, prioritizing Friday (4)
df_weekly = followers_df.groupby(['playlist_id', 'year', 'week']).apply(lambda x: x[x['weekday'] == 4].iloc[:1] if (x['weekday'] == 4).any() else x.iloc[:1])

# Reset index
df_weekly = df_weekly.droplevel(0).reset_index(drop=True)

# Display result
display(df_weekly)


In [None]:
# Merge the data and append it to the correct week
main_df['collection_date'] = pd.to_datetime(main_df['collection_date'])
main_df['year'] = main_df['collection_date'].dt.isocalendar().year
main_df['week'] = main_df['collection_date'].dt.isocalendar().week

# Select only the columns you need from df_weekly (for example, 'followers')
df_weekly_subset = df_weekly[['playlist_id', 'year', 'week', 'playlist_followers']]

# Left merge on playlist_id, year, and week
merged_df = pd.merge(main_df, df_weekly_subset, how='left', on=['playlist_id', 'year', 'week'])

# Display result
display(merged_df)


# Playlist popularity fix

## Setup

In [1]:
# Import modules
import pandas as pd

from datetime import datetime, timedelta
from collections import Counter
from tqdm import tqdm
from soundcharts_API_client import *


In [2]:
# API connexion
x_app_id = 'PARIS1_E22B0B40'
x_api_key = '3c7bfb46ee2705df'

sdch = SoundchartsClient(x_app_id, x_api_key)


In [3]:
df = pd.read_csv("/Users/julienmbarki/Documents/Doctorat/Publications/Article 2/Data/Code/data_analysis/charts_final.csv")


## Get followers data

In [4]:
# Get the tracks' and artists' info
results_df = []

for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
    playlist_id = row['playlist_id']
    playlist_name = row['playlist_name']
    collection_date = row['collection_date']
    mean_track_date = row['mean_track_date']
    playlist_type = row['playlist_type']
    nb_tracks = row['nb_tracks']
    nb_clusters = row['nb_clusters']
    nb_clusters_2 = row['nb_clusters_2']
    hh_index = row['hh_index']
    hh_index_2 = row['hh_index_2']
    distances = row['distances']
    distances_2 = row['distances_2']
    distances_3 = row['distances_3']
    stirling_index = row['stirling_index']
    stirling_index_2 = row['stirling_index_2']

    collection_date_dt = datetime.fromisoformat(collection_date)

    followers = sdch.get_playlist_followers_by_date(
        playlist_id=playlist_id,
        startDate=collection_date,
        endDate=(collection_date_dt + timedelta(days=1)).strftime('%Y-%m-%d')
    )

    if 'items' in followers and followers['items']:
        followers_value = followers['items'][0]['value']
    else:
        followers_value = None

    results_df.append({
        "playlist_id": playlist_id,
        "playlist_name": playlist_name,
        "collection_date": collection_date,
        "mean_track_date": mean_track_date,
        "playlist_type": playlist_type,
        "nb_tracks": nb_tracks,
        "nb_clusters": nb_clusters,
        "nb_clusters_2": nb_clusters_2,
        "hh_index": hh_index,
        "hh_index_2": hh_index_2,
        "distances": distances,
        "distances_2": distances_2,
        "distances_3": distances_3,
        "stirling_index": stirling_index,
        "stirling_index_2": stirling_index_2,
        "playlist_followers": followers_value
    })

# Convert the list to a DataFrame
df = pd.DataFrame(results_df)
df.to_csv('charts_final_bis.csv', index=False)

# Print the DataFrame
display(df)


Processing rows: 100%|██████████| 314/314 [01:07<00:00,  4.66it/s]


Unnamed: 0,playlist_id,playlist_name,collection_date,mean_track_date,playlist_type,nb_tracks,nb_clusters,nb_clusters_2,hh_index,hh_index_2,distances,distances_2,distances_3,stirling_index,stirling_index_2,playlist_followers
0,11e84493-6b0e-6dc0-a8d5-a0369fe50396,Top 50 - France,2021-10-01,2021-02-06 04:05:06,Charts,47,2,9,5509.280217,1308.284292,0.350979,0.739307,0.932134,[[0.15761475]],[[0.33695921]],767571
1,11e84493-6b0e-6dc0-a8d5-a0369fe50396,Top 50 - France,2021-10-08,2021-02-04 05:23:15,Charts,49,3,10,3802.582257,1353.602666,0.482506,0.829257,0.923588,[[0.23092727]],[[0.34492201]],768454
2,11e84493-6b0e-6dc0-a8d5-a0369fe50396,Top 50 - France,2021-10-15,2021-01-18 22:02:26,Charts,49,3,8,3386.089130,1420.241566,0.491988,0.753888,0.952844,[[0.24224488]],[[0.34012151]],769328
3,11e84493-6b0e-6dc0-a8d5-a0369fe50396,Top 50 - France,2021-10-22,2021-03-03 21:33:03,Charts,49,2,7,5351.936693,1903.373594,0.386204,0.758866,0.977816,[[0.17951001]],[[0.33175893]],770197
4,11e84493-6b0e-6dc0-a8d5-a0369fe50396,Top 50 - France,2021-10-29,2021-03-06 04:35:44,Charts,47,3,5,3707.559982,2258.940697,0.496519,0.644281,0.949384,[[0.23225438]],[[0.29414337]],771245
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309,11e84493-6b26-e814-a42e-a0369fe50396,Viral 50 - France,2024-08-30,2023-07-16 16:54:32,Charts,44,2,5,7644.628099,2489.669421,0.459385,0.724420,0.937626,[[0.10820232]],[[0.2888285]],69640
310,11e84493-6b26-e814-a42e-a0369fe50396,Viral 50 - France,2024-09-06,2022-03-15 18:40:00,Charts,45,2,7,7688.888889,2059.259259,0.529418,0.906750,0.916984,[[0.12235433]],[[0.32155954]],69607
311,11e84493-6b26-e814-a42e-a0369fe50396,Viral 50 - France,2024-09-13,2023-08-06 21:52:00,Charts,45,2,6,5555.555556,2009.876543,0.330805,0.702943,0.909112,[[0.14702444]],[[0.30405404]],69636
312,11e84493-6b26-e814-a42e-a0369fe50396,Viral 50 - France,2024-09-20,2023-03-07 04:30:00,Charts,48,3,7,3880.208333,1579.861111,0.570318,0.788977,0.950311,[[0.23179182]],[[0.35070962]],69640
