In [1]:
import pandas as pd
import numpy as np
import random
import time
import datetime


**Importation de la liste de CD**

In [2]:
CD=pd.read_csv("CD2010.csv")
values = {"Artist_firstname": "", "Artiste": "various" , "Titre": "undefined", "Annee": 0, "Genre": "undefined"}
CD.fillna(value=values, inplace=True)
CD['Annee']=CD['Annee'].astype(int)
CD.head()


Unnamed: 0,Artist_firstname,Artiste,Titre,Annee,Genre
0,,A-Ha,"East if the Sun, West of the Moon",0,Rock/Folk
1,,AC/DC,Blow up Your Video,0,Rock/Folk
2,,AC/DC,Live,0,Rock/Folk
3,,AC/DC,The Razors Edge,0,Rock/Folk
4,"Julian ""Cannonball""",Aderley,Somethin' Else,0,Jazz/Blues


**Définition des styles musicaux et des probabilités d'écoute selon les plages horaires**

In [3]:
### liste d'horaires qui seront affinés par random pour définir l'heure d'écoute
hours_wakeup    = range( 6, 9) 
hours_morning   = range( 9,12)
hours_lunch     = range(12,14)
hours_afternoon = range(14,19)
hours_dinner    = range(19,21)
hours_evening   = range(21,24)

# Les genres musicaux référencés
genres = ['Rock/Folk','Electro/Rap/Techno', 'Français', 'Jazz/Blues', 'World', 'Classique/Contemporain']

# Disctinction entre jours de la semaine et wee_end
# Définition des poids selon cette disctinction
days = ['Lundi', 'Mardi','Mercredi','Jeudi', 'Vendredi', 'Samedi', 'Dimanche']
wk_days = ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi']
we_days = ['Samedi', 'Dimanche']
genres_wk_weights = [0.37, 0.1, 0.13, 0.20, 0.08, 0.12]
genres_we_weights = [0.32, 0.1, 0.23, 0.05, 0.08, 0.22]

# Défintion des poids selon les plages horaires
genres_wakeup_weights    = [0.11, 0.0 , 0.26, 0.33, 0.04, 0.26]
genres_morning_weights   = [0.36, 0.0 , 0.13, 0.20, 0.14, 0.17]
genres_lunch_weights     = [0.24, 0.0 , 0.31, 0.27, 0.10, 0.08]
genres_afternoon_weights = [0.35, 0.0 , 0.04, 0.41, 0.01, 0.19]
genres_dinner_weights    = [0.15, 0.0 , 0.14, 1.41, 0.11, 0.09]
genres_evening_weights   = [0.22, 0.1 , 0.23, 0.20, 0.03, 0.22]
genres_nights_weights    = [0.21, 0.07, 0.24, 0.29, 0.03, 0.17]


**Calcul des poids pour la sélection random du Genre**

In [4]:
# Calcul des poids dans la fonction select_style_weights
def compute_weights (day_weights, timeframe_weights):
    weights =[]
    for number1, number2 in zip(day_weights, timeframe_weights):
        weights.append((number1 + number2)/2)
    return (weights)

# Calcul des poids de chaque Style selon le jour de la semaine et l'heure d'écoute
def select_genres_weights(day, hour):
# Pre-calcul du poids en fonction du jour
    if day[0] in wk_days:
        weights = genres_wk_weights
    else :
        weights = genres_we_weights
# Calcul final du poids enfonction de l'heure 
    if hour in hours_wakeup :
        weights = compute_weights(weights, genres_wakeup_weights)
    elif hour in hours_morning :
        weights = compute_weights(weights, genres_morning_weights)
    elif hour in hours_lunch :
        weights = compute_weights(weights, genres_lunch_weights)
    elif hour in hours_afternoon :
        weights = compute_weights(weights, genres_afternoon_weights)
    elif hour in hours_dinner :
        weights = compute_weights(weights, genres_dinner_weights)
    elif hour in hours_evening :
        weights = compute_weights(weights, genres_evening_weights)
    else :
        weights = compute_weights(weights, genres_nights_weights)
    return(weights)

# Function to randomly select a genres among several according to the day and time
def select_genre(day, hour):
    weights = select_genres_weights(day, hour)
    genre = random.choices(genres, weights)
    return(genre)


**Probabilité d'écouter qqch selon le jour et l'heure**

In [5]:
prob =pd.read_csv("ProbDaysHours.csv")
prob.head()

def prob_music (day, hour):
    return (1-prob.at[hour, day],prob.at[hour, day])

**Génération de la période sur laquelle créer l'historique**

In [6]:
def period_gen(start_date = "2021-09-01", end_date = "2021-11-30") :
    datelist = pd.date_range(start  =start_date, end = end_date, freq="D")
    days_list=[]
    for date in datelist :
        days_list.append((days[date.weekday()], date))
    return(days_list)

days_list=period_gen()

**Création d'un historique**

In [7]:
def create_history (nmax=2): # 2 records per hour maximum
# Create and fill in the DataFrame with randomly selected title from style
# Styme selected according to the day and hour
    history = pd.DataFrame()
    for day in days_list:
        for hour in range(0, 23):
            # Random choice of the number of title during the hour timeframe
            nb_listened = random.choice(range(0,nmax))*(random.choices((0,1), prob_music(day[0], hour)))[0]
            for i in range(1,nb_listened+1):
                selected_genre = select_genre(day, hour)[0]
                Listened = CD[CD['Genre']==str(selected_genre)].sample(n=1,replace=False)
                Listened['Week_day'] = day[0]
                hour_arg = str(hour)+ ' hour'
                min_arg = str(random.choice(range(0,60))) + ' min'
                Listened['Date']= (day[1] + pd.Timedelta(hour_arg) + pd.Timedelta(min_arg))
                history = history.append(Listened)
    history['Heure'] = history['Date'][:,].dt.hour
    history['Jour']  = history['Date'][:,].dt.date
    return(history)

history = create_history()
history

Unnamed: 0,Artist_firstname,Artiste,Titre,Annee,Genre,Week_day,Date,Heure,Jour
142,Richard,De Palmas,Marche dans le sable,0,Français,Mercredi,2021-09-01 13:44:00,13,2021-09-01
654,Karlheinz,Stockhausen,Helikopter-Streichquartett,1996,Classique/Contemporain,Mercredi,2021-09-01 15:01:00,15,2021-09-01
662,Syd,Barrett,The Madcap Laughs,0,Rock/Folk,Mercredi,2021-09-01 17:40:00,17,2021-09-01
421,,Mogwai,Come on die young,0,Electro/Rap/Techno,Mercredi,2021-09-01 22:44:00,22,2021-09-01
332,,Lunatic Calm,undefined,0,Electro/Rap/Techno,Jeudi,2021-09-02 08:26:00,8,2021-09-02
...,...,...,...,...,...,...,...,...,...
618,Patti,Smith,Horses,1976,Rock/Folk,Dimanche,2021-11-28 14:20:00,14,2021-11-28
741,Tracy,Chapman,Tracy Chapman,0,Rock/Folk,Dimanche,2021-11-28 17:58:00,17,2021-11-28
812,,Divers,Gipsy Jazz School,0,Jazz/Blues,Dimanche,2021-11-28 18:53:00,18,2021-11-28
789,Zakir,Hussein,Zakir Hussein,0,World,Lundi,2021-11-29 13:14:00,13,2021-11-29


In [8]:
#history=pd.DataFrame
#history = create_history()
history["Full_name"] = history['Artist_firstname'] + " " + history['Artiste']
history = history.drop(["Artist_firstname","Artiste"], axis=1)
history = history.rename(columns={ "Full_name": "Artiste"})
history['User'] = "M"
history['Song'] = "undefined"

history = history.reindex(columns=['User','Date','Jour','Week_day', 'Heure', 'Artiste', 'Titre', 'Song', 'Genre'])
history.to_csv('./history_CD.csv')
history


Unnamed: 0,User,Date,Jour,Week_day,Heure,Artiste,Titre,Song,Genre
142,M,2021-09-01 13:44:00,2021-09-01,Mercredi,13,Richard De Palmas,Marche dans le sable,undefined,Français
654,M,2021-09-01 15:01:00,2021-09-01,Mercredi,15,Karlheinz Stockhausen,Helikopter-Streichquartett,undefined,Classique/Contemporain
662,M,2021-09-01 17:40:00,2021-09-01,Mercredi,17,Syd Barrett,The Madcap Laughs,undefined,Rock/Folk
421,M,2021-09-01 22:44:00,2021-09-01,Mercredi,22,Mogwai,Come on die young,undefined,Electro/Rap/Techno
332,M,2021-09-02 08:26:00,2021-09-02,Jeudi,8,Lunatic Calm,undefined,undefined,Electro/Rap/Techno
...,...,...,...,...,...,...,...,...,...
618,M,2021-11-28 14:20:00,2021-11-28,Dimanche,14,Patti Smith,Horses,undefined,Rock/Folk
741,M,2021-11-28 17:58:00,2021-11-28,Dimanche,17,Tracy Chapman,Tracy Chapman,undefined,Rock/Folk
812,M,2021-11-28 18:53:00,2021-11-28,Dimanche,18,Divers,Gipsy Jazz School,undefined,Jazz/Blues
789,M,2021-11-29 13:14:00,2021-11-29,Lundi,13,Zakir Hussein,Zakir Hussein,undefined,World
