### This notebook demonstrates how to generate a user monthly listening trend dataset for the Mood.fm project.

In [1]:
import pandas as pd
import numpy as np
np.random.seed(10)

In [2]:
file_path = '../../datasets/simulated_user_dataset.csv'
simulated_user_data = pd.read_csv(file_path)
simulated_user_data.head()

Unnamed: 0,duration (ms),danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,spec_rate,labels,uri,user_id,group_no
0,324240.0,0.407733,0.714,0.664428,0.311258,0.473896,0.0,0.881797,0.216,0.400413,1.449544e-07,0.0,spotify:track:0lyvsvBtj2j0Edikx2fjhw,1,2
1,285753.0,0.177466,0.101,0.115246,0.313907,0.997992,0.382,0.307329,0.122,0.624017,1.658775e-07,0.0,spotify:track:1gMc3PHvlQbOCChXj3ySHH,1,2
2,180570.0,0.429153,0.872,0.632533,0.287417,0.002651,0.000529,0.416076,0.69,0.731116,2.4035e-07,2.0,spotify:track:5WXSmZUK7MAtkcKoBPFyHm,1,2
3,191153.0,0.50091,0.816,0.665492,0.582207,0.068976,0.0,0.401891,0.885,0.848131,4.754654e-07,1.0,spotify:track:3GSGkA3RNWcpjKGwTDps0M,1,2
4,207416.0,0.627289,0.759,0.62589,0.198013,0.457831,0.721,0.225532,0.738,0.515581,1.441547e-07,0.0,spotify:track:5b6z5ySwNBomo3Ia7WeECC,1,2


In [10]:
# Create 1-Month Listening History
# We generate a sample of songs for each of the days, 
# ensuring that the sampled songs come from the user's group.

num_days = 31
songs_per_day = 50

month_history = []
user_id = 2
user_data = simulated_user_data[simulated_user_data['user_id'] == user_id]

group_no = user_data['group_no'].iloc[0]
for day in range(1, num_days + 1):
    # Sample songs for the current day
    day_sample = user_data.sample(n=songs_per_day, replace=True, random_state=day)
    day_sample = day_sample.copy()
    day_sample['day'] = day
    month_history.append(day_sample)

month_history_df = pd.concat(month_history, ignore_index=True)
month_history_df.head()

Unnamed: 0,duration (ms),danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,spec_rate,labels,uri,user_id,group_no,day
0,260101.0,0.795438,0.653,0.601042,0.249007,0.63253,1e-06,0.17305,0.816,0.428965,1.445592e-07,1.0,spotify:track:04dZBH1s2UVWz5M02E1agx,2,1,1
1,322456.0,0.543751,0.767,0.678044,0.582207,0.155622,0.0,0.846336,0.554,0.846651,4.754654e-07,1.0,spotify:track:3ys2QxcyVdBWRBpo6zxlzE,2,1,1
2,153522.0,0.716183,0.647,0.52081,0.582207,0.001114,0.0,0.356974,0.304,0.630003,4.754654e-07,1.0,spotify:track:0yAEZcLsvZA2o5DbgzvMmM,2,1,1
3,231640.0,0.497697,0.304,0.288079,0.188079,0.819277,0.366,0.224113,0.0392,0.423185,1.22604e-07,0.0,spotify:track:1OD2YXIOmfYmBAYfV1maRV,2,1,1
4,248253.0,0.491271,0.413,0.516112,0.3,0.498996,0.899,0.200236,0.331,0.857822,1.824751e-07,3.0,spotify:track:2iioiK7vlAunMJyyrnG9hi,2,1,1


In [12]:
output_path = f'../../datasets/user{user_id}_1month_listening_history.csv'
month_history_df.to_csv(output_path, index=False)
print(f'1-month listening history saved to {output_path}')

1-month listening history saved to ../../datasets/user2_1month_listening_history.csv


----------------------------------------------------------------