In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv("songstats_data_included_final.csv")

# Define the scoring function
def calculate_popularity_scores_with_log(df):
    # Define weights for each column
    weights = {
        "streams_total": 0.4,          # High weight
        "popularity_current": 0.4,    # High weight
        "charts_current": 0.1,        # Moderate weight
        "charts_total": 0.1,          # Moderate weight
        "playlists_current": 0.03,    # Low weight
        "playlists_total": 0.03,      # Low weight
        "playlists_editorial_current": 0.01,  # Very low weight
        "playlists_editorial_total": 0.01,    # Very low weight
        "playlist_reach_current": 0.02,       # Very low weight
        "playlist_reach_total": 0.02,         # Very low weight
    }
    
    # Initialize new columns with default values
    df["raw_popularity_score"] = 0
    df["normalized_popularity_score"] = 0
    
    # Iterate through the rows to calculate raw scores
    for index, row in df.iterrows():
        if pd.isna(row["streams_total"]):  # Handle songs with no data
            df.at[index, "raw_popularity_score"] = 0
        else:
            # Calculate the weighted logarithmic sum
            score = sum(
                weights[col] * np.log1p(row[col])  # log1p(x) = log(1 + x), avoids log(0) issues
                for col in weights if col in row and not pd.isna(row[col]) and row[col] > 0
            )
            df.at[index, "raw_popularity_score"] = score
    
    # Normalize the scores using logarithmic scaling
    max_score = df["raw_popularity_score"].max()
    if max_score > 0:
        df["normalized_popularity_score"] = np.log1p(df["raw_popularity_score"]) / np.log1p(max_score)
    else:
        df["normalized_popularity_score"] = 0  # Avoid division by zero
    
    return df

# Example usage with your dataset
df = calculate_popularity_scores_with_log(df)

# Save to a new CSV for inspection
df.to_csv("songstats_with_log_scores.csv", index=False)
print("Scores calculated and saved to 'songstats_with_log_scores.csv'.")

Scores calculated and saved to 'songstats_with_log_scores.csv'.
