# Followers fix

## Setup

In [1]:
# Import modules
import pandas as pd

from collections import Counter
from tqdm import tqdm


In [None]:
# Import the data
followers_df = pd.read_csv('/Users/julienmbarki/Documents/Doctorat/Publications/Article 2/Data/Code/data_management/db_soundcharts/editorial_tracks_followers_23-24.csv')
main_df = pd.read_csv('/Users/julienmbarki/Documents/Doctorat/Publications/Article 2/Data/Code/data_management/editorial_playlists_23-24_final.csv')
display(followers_df)


## Clean followers data

In [None]:
# Collapse to weekly level playlists data
# Convert date column to datetime
followers_df['playlist_followers_date'] = pd.to_datetime(followers_df['playlist_followers_date'])

# Extract year and week number
followers_df['year'] = followers_df['playlist_followers_date'].dt.isocalendar().year
followers_df['week'] = followers_df['playlist_followers_date'].dt.isocalendar().week
followers_df['weekday'] = followers_df['playlist_followers_date'].dt.weekday  # Monday=0, ..., Sunday=6

# Sort data to ensure selection priority
followers_df.sort_values(by=['playlist_id', 'year', 'week', 'weekday'], ascending=[True, True, True, True], inplace=True)

# Keep only one entry per playlist_id per week, prioritizing Friday (4)
df_weekly = followers_df.groupby(['playlist_id', 'year', 'week']).apply(lambda x: x[x['weekday'] == 4].iloc[:1] if (x['weekday'] == 4).any() else x.iloc[:1])

# Reset index
df_weekly = df_weekly.droplevel(0).reset_index(drop=True)

# Display result
display(df_weekly)


In [None]:
# Merge the data and append it to the correct week
main_df['collection_date'] = pd.to_datetime(main_df['collection_date'])
main_df['year'] = main_df['collection_date'].dt.isocalendar().year
main_df['week'] = main_df['collection_date'].dt.isocalendar().week

# Select only the columns you need from df_weekly (for example, 'followers')
df_weekly_subset = df_weekly[['playlist_id', 'year', 'week', 'playlist_followers']]

# Left merge on playlist_id, year, and week
merged_df = pd.merge(main_df, df_weekly_subset, how='left', on=['playlist_id', 'year', 'week'])

# Display result
display(merged_df)


# Playlist popularity fix

## Setup

In [1]:
# Import modules
import pandas as pd

from datetime import datetime, timedelta
from collections import Counter
from tqdm import tqdm
from soundcharts_API_client import *


In [2]:
# API connexion
x_app_id = 'PARIS1_E22B0B40'
x_api_key = '3c7bfb46ee2705df'

sdch = SoundchartsClient(x_app_id, x_api_key)


In [3]:
df = pd.read_csv("/Users/julienmbarki/Documents/Doctorat/Publications/Article 2/Data/Code/data_analysis/df_final_bis.csv")


## Get followers data

In [4]:
# Get the tracks' and artists' info
results_df = []

for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
    playlist_id = row['playlist_id']
    playlist_name = row['playlist_name']
    collection_date = row['collection_date']
    mean_track_date = row['mean_track_date']
    playlist_type = row['playlist_type']
    nb_tracks = row['nb_tracks']
    nb_clusters = row['nb_clusters']
    nb_clusters_2 = row['nb_clusters_2']
    hh_index = row['hh_index']
    hh_index_2 = row['hh_index_2']
    distances = row['distances']
    distances_2 = row['distances_2']
    distances_3 = row['distances_3']
    stirling_index = row['stirling_index']
    stirling_index_2 = row['stirling_index_2']
    editorial_type = row['editorial_type']

    collection_date_dt = datetime.fromisoformat(collection_date)

    followers = sdch.get_playlist_followers_by_date(
        playlist_id=playlist_id,
        startDate=collection_date,
        endDate=(collection_date_dt + timedelta(days=1)).strftime('%Y-%m-%d')
    )

    if 'items' in followers and followers['items']:
        followers_value = followers['items'][0]['value']
    else:
        followers_value = None

    results_df.append({
        "playlist_id": playlist_id,
        "playlist_name": playlist_name,
        "collection_date": collection_date,
        "mean_track_date": mean_track_date,
        "playlist_type": playlist_type,
        "nb_tracks": nb_tracks,
        "nb_clusters": nb_clusters,
        "nb_clusters_2": nb_clusters_2,
        "hh_index": hh_index,
        "hh_index_2": hh_index_2,
        "distances": distances,
        "distances_2": distances_2,
        "distances_3": distances_3,
        "stirling_index": stirling_index,
        "stirling_index_2": stirling_index_2,
        "editorial_type": editorial_type,
        "playlist_followers": followers_value
    })

# Convert the list to a DataFrame
df = pd.DataFrame(results_df)
df.to_csv('df_final_ter.csv', index=False)

# Print the DataFrame
display(df)


Processing rows: 100%|██████████| 19153/19153 [1:08:49<00:00,  4.64it/s]


Unnamed: 0,playlist_id,playlist_name,collection_date,mean_track_date,playlist_type,nb_tracks,nb_clusters,nb_clusters_2,hh_index,hh_index_2,distances,distances_2,distances_3,stirling_index,stirling_index_2,editorial_type,playlist_followers
0,11e84480-ad44-deb2-8ac8-a0369fe50396,"Rap Fr | Rap Francais 2024 | Hits Rap | SDM, ...",2021-10-01,2021-01-26 09:42:51,Major label,84,3,10,3353.174603,1403.061224,0.398322,0.672074,0.839007,[[0.19848049]],[[0.29769668]],genre,211281
1,11e84480-ad44-deb2-8ac8-a0369fe50396,"Rap Fr | Rap Francais 2024 | Hits Rap | SDM, ...",2021-10-08,2021-01-29 07:20:28,Major label,85,2,10,5056.055363,1222.145329,0.263768,0.648020,0.836092,[[0.13040538]],[[0.29858228]],genre,211249
2,11e84480-ad44-deb2-8ac8-a0369fe50396,"Rap Fr | Rap Francais 2024 | Hits Rap | SDM, ...",2021-10-15,2021-02-08 10:24:00,Major label,90,2,9,5061.728395,1286.419753,0.260919,0.623667,0.826567,[[0.12884873]],[[0.28702486]],genre,211232
3,11e84480-ad44-deb2-8ac8-a0369fe50396,"Rap Fr | Rap Francais 2024 | Hits Rap | SDM, ...",2021-10-22,2021-02-19 22:59:22,Major label,95,2,10,5000.554017,1197.783934,0.262501,0.640954,0.830211,[[0.13123587]],[[0.29649178]],genre,211375
4,11e84480-ad44-deb2-8ac8-a0369fe50396,"Rap Fr | Rap Francais 2024 | Hits Rap | SDM, ...",2021-10-29,2021-03-01 19:38:10,Major label,99,2,7,5004.591368,1664.115907,0.260038,0.574739,0.829210,[[0.12989972]],[[0.27389869]],genre,212164
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19148,f5de1012-b6d6-11e8-8a3a-525400009efb,Gym Tonic,2024-08-30,1987-03-30 03:34:28,Editorial,47,3,7,4540.516071,2023.540063,0.474453,0.799974,0.887480,[[0.18947173]],[[0.29370523]],mood,99605
19149,f5de1012-b6d6-11e8-8a3a-525400009efb,Gym Tonic,2024-09-06,1987-03-30 03:34:28,Editorial,47,3,7,4540.516071,2023.540063,0.474453,0.799974,0.887480,[[0.18947173]],[[0.29370523]],mood,99672
19150,f5de1012-b6d6-11e8-8a3a-525400009efb,Gym Tonic,2024-09-13,1987-03-30 03:34:28,Editorial,47,3,7,4540.516071,2023.540063,0.474453,0.799974,0.887480,[[0.18947173]],[[0.29370523]],mood,99733
19151,f5de1012-b6d6-11e8-8a3a-525400009efb,Gym Tonic,2024-09-20,1987-03-30 03:34:28,Editorial,47,3,7,4540.516071,2023.540063,0.474453,0.799974,0.887480,[[0.18947173]],[[0.29370523]],mood,99772
