In [1]:
import pandas as pd
import numpy as np
import random
import time
import datetime


**Importation de la liste de CD**

In [2]:
CD=pd.read_csv("CD2010.csv")
values = {"Artist_firstname": "", "Artiste": "various" , "Titre": "undefined", "Annee": 0, "Genre": "undefined"}
CD.fillna(value=values, inplace=True)
CD['Annee']=CD['Annee'].astype(int)
CD.head()


Unnamed: 0,Artist_firstname,Artiste,Titre,Annee,Genre
0,,A-Ha,"East if the Sun, West of the Moon",0,Rock/Folk
1,,AC/DC,Blow up Your Video,0,Rock/Folk
2,,AC/DC,Live,0,Rock/Folk
3,,AC/DC,The Razors Edge,0,Rock/Folk
4,"Julian ""Cannonball""",Aderley,Somethin' Else,0,Jazz/Blues


**Définition des styles musicaux et des probabilités d'écoute selon les plages horaires**

In [3]:
### liste d'horaires qui seront affinés par random pour définir l'heure d'écoute
hours_wakeup    = range( 6, 9) 
hours_morning   = range( 9,12)
hours_lunch     = range(12,14)
hours_afternoon = range(14,19)
hours_dinner    = range(19,21)
hours_evening   = range(21,24)

# Les genres musicaux référencés
genres = ['Rock/Folk','Electro/Rap/Techno', 'Français', 'Jazz/Blues', 'World', 'Classique/Contemporain']

# Disctinction entre jours de la semaine et wee_end
# Définition des poids selon cette disctinction
days = ['Lundi', 'Mardi','Mercredi','Jeudi', 'Vendredi', 'Samedi', 'Dimanche']
wk_days = ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi']
we_days = ['Samedi', 'Dimanche']
genres_wk_weights = [0.37, 0.1, 0.13, 0.20, 0.08, 0.12]
genres_we_weights = [0.32, 0.1, 0.23, 0.05, 0.08, 0.22]
#genres_vac_weights = [0.30, 0.1, 0.23, 0.15, 0.10, 0.12]


# Défintion des poids selon les plages horaires
genres_wakeup_weights    = [0.11, 0.0 , 0.26, 0.33, 0.04, 0.26]
genres_morning_weights   = [0.36, 0.0 , 0.13, 0.20, 0.14, 0.17]
genres_lunch_weights     = [0.24, 0.0 , 0.31, 0.27, 0.10, 0.08]
genres_afternoon_weights = [0.35, 0.0 , 0.04, 0.41, 0.01, 0.19]
genres_dinner_weights    = [0.15, 0.0 , 0.14, 1.41, 0.11, 0.09]
genres_evening_weights   = [0.22, 0.1 , 0.23, 0.20, 0.03, 0.22]
genres_nights_weights    = [0.21, 0.07, 0.24, 0.29, 0.03, 0.17]


**Calcul des poids pour la sélection random du Genre**

In [4]:
# Calcul des poids dans la fonction select_style_weights
def compute_weights (day_weights, timeframe_weights):
    weights =[]
    for number1, number2 in zip(day_weights, timeframe_weights):
        weights.append((number1 + number2)/2)
    return (weights)

# Calcul des poids de chaque Style selon le jour de la semaine et l'heure d'écoute
def select_genres_weights(day, hour):
# Pre-calcul du poids en fonction du jour
    if day[0] in wk_days:
        weights = genres_wk_weights
    else :
        weights = genres_we_weights
# Calcul final du poids enfonction de l'heure 
    if hour in hours_wakeup :
        weights = compute_weights(weights, genres_wakeup_weights)
    elif hour in hours_morning :
        weights = compute_weights(weights, genres_morning_weights)
    elif hour in hours_lunch :
        weights = compute_weights(weights, genres_lunch_weights)
    elif hour in hours_afternoon :
        weights = compute_weights(weights, genres_afternoon_weights)
    elif hour in hours_dinner :
        weights = compute_weights(weights, genres_dinner_weights)
    elif hour in hours_evening :
        weights = compute_weights(weights, genres_evening_weights)
    else :
        weights = compute_weights(weights, genres_nights_weights)
    return(weights)

# Function to randomly select a genres among several according to the day and time
def select_genre(day, hour):
    weights = select_genres_weights(day, hour)
    genre = random.choices(genres, weights)
    return(genre)


**Génération de la période sur laquelle créer l'historique**

In [5]:
def period_gen(start_date = "2021-01-01", end_date = "2021-12-31") :
    datelist = pd.date_range(start  =start_date, end = end_date, freq="D")
    days_list=[]
    for date in datelist :
        days_list.append((days[date.weekday()], date))
    return(days_list)

days_list=period_gen()

vacances = period_gen("2021-12-17","2021-12-31")
vacances+=(period_gen("2021-08-01","2021-08-15"))
vacances+=(period_gen("2021-05-01","2021-05-8"))
vacances

[('Vendredi', Timestamp('2021-12-17 00:00:00', freq='D')),
 ('Samedi', Timestamp('2021-12-18 00:00:00', freq='D')),
 ('Dimanche', Timestamp('2021-12-19 00:00:00', freq='D')),
 ('Lundi', Timestamp('2021-12-20 00:00:00', freq='D')),
 ('Mardi', Timestamp('2021-12-21 00:00:00', freq='D')),
 ('Mercredi', Timestamp('2021-12-22 00:00:00', freq='D')),
 ('Jeudi', Timestamp('2021-12-23 00:00:00', freq='D')),
 ('Vendredi', Timestamp('2021-12-24 00:00:00', freq='D')),
 ('Samedi', Timestamp('2021-12-25 00:00:00', freq='D')),
 ('Dimanche', Timestamp('2021-12-26 00:00:00', freq='D')),
 ('Lundi', Timestamp('2021-12-27 00:00:00', freq='D')),
 ('Mardi', Timestamp('2021-12-28 00:00:00', freq='D')),
 ('Mercredi', Timestamp('2021-12-29 00:00:00', freq='D')),
 ('Jeudi', Timestamp('2021-12-30 00:00:00', freq='D')),
 ('Vendredi', Timestamp('2021-12-31 00:00:00', freq='D')),
 ('Dimanche', Timestamp('2021-08-01 00:00:00', freq='D')),
 ('Lundi', Timestamp('2021-08-02 00:00:00', freq='D')),
 ('Mardi', Timestamp('

**Probabilité d'écouter qqch selon le jour et l'heure**

In [6]:
prob =pd.read_csv("ProbDaysHours.csv")
prob.head()

def prob_music (day, hour):
    return (1-prob.at[hour, day],prob.at[hour, day])

**Création d'un historique**

In [7]:
# NEW VERSION

def create_history (nmax=2): # 2 records per hour maximum
# Create and fill in the DataFrame with randomly selected title from style
# Styme selected according to the day and hour
    history = pd.DataFrame()
    for day in days_list: 
        for hour in range(0, 23):
            # Random choice of the number of title during the hour timeframe
            if day in vacances:
                nb_listened  = random.choice(range(1,nmax))*(random.choices((0,1), prob_music("Vac", hour)))[0]
                #print("vacations", day[0])
            else:
                nb_listened  = random.choice(range(1,nmax))*(random.choices((0,1), prob_music(day[0], hour)))[0]
            for i in range(1,nb_listened+1):
                selected_genre = select_genre(day, hour)[0]
                Listened = CD[CD['Genre']==str(selected_genre)].sample(n=1,replace=False)
                Listened['Week_day'] = day[0]
                hour_arg = str(hour)+ ' hour'
                min_arg = str(random.choice(range(0,60))) + ' min'
                Listened['Date']= (day[1] + pd.Timedelta(hour_arg) + pd.Timedelta(min_arg))
                history = history.append(Listened)
    history['Heure'] = history['Date'][:,].dt.hour
    history['Jour']  = history['Date'][:,].dt.date
    return(history)

history = create_history()
history

Unnamed: 0,Artist_firstname,Artiste,Titre,Annee,Genre,Week_day,Date,Heure,Jour
639,Sonny,Rollins,What's new ?,0,Jazz/Blues,Vendredi,2021-01-01 18:28:00,18,2021-01-01
798,,Divers,150 ans de passions dans l'Opéra français,0,Classique/Contemporain,Vendredi,2021-01-01 19:22:00,19,2021-01-01
212,Herbie,Hancock,Dis is da drums,0,Jazz/Blues,Vendredi,2021-01-01 21:01:00,21,2021-01-01
583,Roger,Water,"Music from ""the body""",0,Jazz/Blues,Vendredi,2021-01-01 22:03:00,22,2021-01-01
350,,Magma,Concert Opera de Reims 1976,1976,Rock/Folk,Samedi,2021-01-02 11:10:00,11,2021-01-02
...,...,...,...,...,...,...,...,...,...
124,John,Coltrane,Live in Japan,1966,Jazz/Blues,Vendredi,2021-12-31 18:52:00,18,2021-12-31
587,,Romano Sclavis Texier,Suite Africaine,0,Jazz/Blues,Vendredi,2021-12-31 19:53:00,19,2021-12-31
205,Arthur,H,Adieu Tristesse,0,Français,Vendredi,2021-12-31 20:12:00,20,2021-12-31
800,,Divers,A look at French Opera de Lully à Poulenc,0,Classique/Contemporain,Vendredi,2021-12-31 21:32:00,21,2021-12-31


In [8]:
history["Full_name"] = history['Artist_firstname'] + " " + history['Artiste']
history = history.drop(["Artist_firstname","Artiste"], axis=1)
history = history.rename(columns={ "Full_name": "Artiste"})
history['User'] = "M"
history['Song'] = "undefined"
history = history.reindex(columns=['User','Date','Jour','Week_day', 'Heure', 'Artiste', 'Titre', 'Song', 'Genre'])

history.reset_index(drop=True, inplace=True)
history

Unnamed: 0,User,Date,Jour,Week_day,Heure,Artiste,Titre,Song,Genre
0,M,2021-01-01 18:28:00,2021-01-01,Vendredi,18,Sonny Rollins,What's new ?,undefined,Jazz/Blues
1,M,2021-01-01 19:22:00,2021-01-01,Vendredi,19,Divers,150 ans de passions dans l'Opéra français,undefined,Classique/Contemporain
2,M,2021-01-01 21:01:00,2021-01-01,Vendredi,21,Herbie Hancock,Dis is da drums,undefined,Jazz/Blues
3,M,2021-01-01 22:03:00,2021-01-01,Vendredi,22,Roger Water,"Music from ""the body""",undefined,Jazz/Blues
4,M,2021-01-02 11:10:00,2021-01-02,Samedi,11,Magma,Concert Opera de Reims 1976,undefined,Rock/Folk
...,...,...,...,...,...,...,...,...,...
1917,M,2021-12-31 18:52:00,2021-12-31,Vendredi,18,John Coltrane,Live in Japan,undefined,Jazz/Blues
1918,M,2021-12-31 19:53:00,2021-12-31,Vendredi,19,Romano Sclavis Texier,Suite Africaine,undefined,Jazz/Blues
1919,M,2021-12-31 20:12:00,2021-12-31,Vendredi,20,Arthur H,Adieu Tristesse,undefined,Français
1920,M,2021-12-31 21:32:00,2021-12-31,Vendredi,21,Divers,A look at French Opera de Lully à Poulenc,undefined,Classique/Contemporain


In [9]:

history.to_csv('./history_CD.csv')
history.to_json('./history_CD.json')
history


Unnamed: 0,User,Date,Jour,Week_day,Heure,Artiste,Titre,Song,Genre
0,M,2021-01-01 18:28:00,2021-01-01,Vendredi,18,Sonny Rollins,What's new ?,undefined,Jazz/Blues
1,M,2021-01-01 19:22:00,2021-01-01,Vendredi,19,Divers,150 ans de passions dans l'Opéra français,undefined,Classique/Contemporain
2,M,2021-01-01 21:01:00,2021-01-01,Vendredi,21,Herbie Hancock,Dis is da drums,undefined,Jazz/Blues
3,M,2021-01-01 22:03:00,2021-01-01,Vendredi,22,Roger Water,"Music from ""the body""",undefined,Jazz/Blues
4,M,2021-01-02 11:10:00,2021-01-02,Samedi,11,Magma,Concert Opera de Reims 1976,undefined,Rock/Folk
...,...,...,...,...,...,...,...,...,...
1917,M,2021-12-31 18:52:00,2021-12-31,Vendredi,18,John Coltrane,Live in Japan,undefined,Jazz/Blues
1918,M,2021-12-31 19:53:00,2021-12-31,Vendredi,19,Romano Sclavis Texier,Suite Africaine,undefined,Jazz/Blues
1919,M,2021-12-31 20:12:00,2021-12-31,Vendredi,20,Arthur H,Adieu Tristesse,undefined,Français
1920,M,2021-12-31 21:32:00,2021-12-31,Vendredi,21,Divers,A look at French Opera de Lully à Poulenc,undefined,Classique/Contemporain
