In [1]:
import pandas as pd
import numpy as np
import random
import time
import datetime


**Importation de la liste de CD**

In [2]:
CD=pd.read_csv("CD2010.csv")
values = {"Artist_firstname": "", "Artiste": "various" , "Titre": "undefined", "Annee": 0, "Genre": "undefined"}
CD.fillna(value=values, inplace=True)
CD['Annee']=CD['Annee'].astype(int)
CD.head()


Unnamed: 0,Artist_firstname,Artiste,Titre,Annee,Genre
0,,A-Ha,"East if the Sun, West of the Moon",0,Rock/Folk
1,,AC/DC,Blow up Your Video,0,Rock/Folk
2,,AC/DC,Live,0,Rock/Folk
3,,AC/DC,The Razors Edge,0,Rock/Folk
4,"Julian ""Cannonball""",Aderley,Somethin' Else,0,Jazz/Blues


**Définition des styles musicaux et des probabilités d'écoute selon les plages horaires**

In [3]:
### liste d'horaires qui seront affinés par random pour définir l'heure d'écoute
hours_wakeup    = range( 6, 9) 
hours_morning   = range( 9,12)
hours_lunch     = range(12,14)
hours_afternoon = range(14,19)
hours_dinner    = range(19,21)
hours_evening   = range(21,24)

# Les genres musicaux référencés
genres = ['Rock/Folk','Electro/Rap/Techno', 'Français', 'Jazz/Blues', 'World', 'Classique/Contemporain']

# Disctinction entre jours de la semaine et wee_end
# Définition des poids selon cette disctinction
days = ['Lundi', 'Mardi','Mercredi','Jeudi', 'Vendredi', 'Samedi', 'Dimanche']
wk_days = ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi']
we_days = ['Samedi', 'Dimanche']
genres_wk_weights = [0.37, 0.1, 0.13, 0.20, 0.08, 0.12]
genres_we_weights = [0.32, 0.1, 0.23, 0.05, 0.08, 0.22]

# Défintion des poids selon les plages horaires
genres_wakeup_weights    = [0.11, 0.0 , 0.26, 0.33, 0.04, 0.26]
genres_morning_weights   = [0.36, 0.0 , 0.13, 0.20, 0.14, 0.17]
genres_lunch_weights     = [0.24, 0.0 , 0.31, 0.27, 0.10, 0.08]
genres_afternoon_weights = [0.35, 0.0 , 0.04, 0.41, 0.01, 0.19]
genres_dinner_weights    = [0.15, 0.0 , 0.14, 1.41, 0.11, 0.09]
genres_evening_weights   = [0.22, 0.1 , 0.23, 0.20, 0.03, 0.22]
genres_nights_weights    = [0.21, 0.07, 0.24, 0.29, 0.03, 0.17]


**Calcul des poids pour la sélection random du Genre**

In [4]:
# Calcul des poids dans la fonction select_style_weights
def compute_weights (day_weights, timeframe_weights):
    weights =[]
    for number1, number2 in zip(day_weights, timeframe_weights):
        weights.append((number1 + number2)/2)
    return (weights)

# Calcul des poids de chaque Style selon le jour de la semaine et l'heure d'écoute
def select_genres_weights(day, hour):
# Pre-calcul du poids en fonction du jour
    if day[0] in wk_days:
        weights = genres_wk_weights
    else :
        weights = genres_we_weights
# Calcul final du poids enfonction de l'heure 
    if hour in hours_wakeup :
        weights = compute_weights(weights, genres_wakeup_weights)
    elif hour in hours_morning :
        weights = compute_weights(weights, genres_morning_weights)
    elif hour in hours_lunch :
        weights = compute_weights(weights, genres_lunch_weights)
    elif hour in hours_afternoon :
        weights = compute_weights(weights, genres_afternoon_weights)
    elif hour in hours_dinner :
        weights = compute_weights(weights, genres_dinner_weights)
    elif hour in hours_evening :
        weights = compute_weights(weights, genres_evening_weights)
    else :
        weights = compute_weights(weights, genres_nights_weights)
    return(weights)

# Function to randomly select a genres among several according to the day and time
def select_genre(day, hour):
    weights = select_genres_weights(day, hour)
    genre = random.choices(genres, weights)
    return(genre)


**Probabilité d'écouter qqch selon le jour et l'heure**

In [5]:
prob =pd.read_csv("ProbDaysHours.csv")
prob.head()

def prob_music (day, hour):
    return (1-prob.at[hour, day],prob.at[hour, day])

**Génération de la période sur laquelle créer l'historique**

In [6]:
def period_gen(start_date = "2021-01-01", end_date = "2021-12-31") :
    datelist = pd.date_range(start  =start_date, end = end_date, freq="D")
    days_list=[]
    for date in datelist :
        days_list.append((days[date.weekday()], date))
    return(days_list)

days_list=period_gen()

**Création d'un historique**

In [7]:
def create_history (nmax=2): # 2 records per hour maximum
# Create and fill in the DataFrame with randomly selected title from style
# Styme selected according to the day and hour
    history = pd.DataFrame()
    for day in days_list:
        for hour in range(0, 23):
            # Random choice of the number of title during the hour timeframe
            nb_listened = random.choice(range(0,nmax))*(random.choices((0,1), prob_music(day[0], hour)))[0]
            for i in range(1,nb_listened+1):
                selected_genre = select_genre(day, hour)[0]
                Listened = CD[CD['Genre']==str(selected_genre)].sample(n=1,replace=False)
                Listened['Week_day'] = day[0]
                hour_arg = str(hour)+ ' hour'
                min_arg = str(random.choice(range(0,60))) + ' min'
                Listened['Date']= (day[1] + pd.Timedelta(hour_arg) + pd.Timedelta(min_arg))
                history = history.append(Listened)
    history['Heure'] = history['Date'][:,].dt.hour
    history['Jour']  = history['Date'][:,].dt.date
    return(history)

history = create_history()
history

Unnamed: 0,Artist_firstname,Artiste,Titre,Annee,Genre,Week_day,Date,Heure,Jour
145,,Deee Lite,World Clique,0,Rock/Folk,Vendredi,2021-01-01 18:29:00,18,2021-01-01
130,John,Coltrane,The heavyweight Champion,0,Jazz/Blues,Vendredi,2021-01-01 20:33:00,20,2021-01-01
757,,Urban Species,Listen,0,Electro/Rap/Techno,Samedi,2021-01-02 10:34:00,10,2021-01-02
181,Gabriel,Faure,Requiem,0,Classique/Contemporain,Samedi,2021-01-02 11:11:00,11,2021-01-02
45,Andrew,Bird,And the mysterious production of eggs,0,Rock/Folk,Samedi,2021-01-02 12:14:00,12,2021-01-02
...,...,...,...,...,...,...,...,...,...
26,Jean-Sébastien,Bach,Cello Suites,2005,Classique/Contemporain,Mardi,2021-12-28 21:59:00,21,2021-12-28
470,Ornette,Coleman,The shape of Jazz to come,0,Jazz/Blues,Mercredi,2021-12-29 08:37:00,8,2021-12-29
619,Jimmy,Smith,Midnight Special,1961,Jazz/Blues,Mercredi,2021-12-29 22:59:00,22,2021-12-29
489,Oscar,Peterson,An evening with Oscar Peterson,0,Jazz/Blues,Jeudi,2021-12-30 19:08:00,19,2021-12-30


In [8]:
history["Full_name"] = history['Artist_firstname'] + " " + history['Artiste']
history = history.drop(["Artist_firstname","Artiste"], axis=1)
history = history.rename(columns={ "Full_name": "Artiste"})
history['User'] = "M"
history['Song'] = "undefined"
history = history.reindex(columns=['User','Date','Jour','Week_day', 'Heure', 'Artiste', 'Titre', 'Song', 'Genre'])

history.reset_index(drop=True, inplace=True)
history

Unnamed: 0,User,Date,Jour,Week_day,Heure,Artiste,Titre,Song,Genre
0,M,2021-01-01 18:29:00,2021-01-01,Vendredi,18,Deee Lite,World Clique,undefined,Rock/Folk
1,M,2021-01-01 20:33:00,2021-01-01,Vendredi,20,John Coltrane,The heavyweight Champion,undefined,Jazz/Blues
2,M,2021-01-02 10:34:00,2021-01-02,Samedi,10,Urban Species,Listen,undefined,Electro/Rap/Techno
3,M,2021-01-02 11:11:00,2021-01-02,Samedi,11,Gabriel Faure,Requiem,undefined,Classique/Contemporain
4,M,2021-01-02 12:14:00,2021-01-02,Samedi,12,Andrew Bird,And the mysterious production of eggs,undefined,Rock/Folk
...,...,...,...,...,...,...,...,...,...
871,M,2021-12-28 21:59:00,2021-12-28,Mardi,21,Jean-Sébastien Bach,Cello Suites,undefined,Classique/Contemporain
872,M,2021-12-29 08:37:00,2021-12-29,Mercredi,8,Ornette Coleman,The shape of Jazz to come,undefined,Jazz/Blues
873,M,2021-12-29 22:59:00,2021-12-29,Mercredi,22,Jimmy Smith,Midnight Special,undefined,Jazz/Blues
874,M,2021-12-30 19:08:00,2021-12-30,Jeudi,19,Oscar Peterson,An evening with Oscar Peterson,undefined,Jazz/Blues


In [9]:

history.to_csv('./history_CD.csv')
history.to_json('./history_CD.json')
history


Unnamed: 0,User,Date,Jour,Week_day,Heure,Artiste,Titre,Song,Genre
0,M,2021-01-01 18:29:00,2021-01-01,Vendredi,18,Deee Lite,World Clique,undefined,Rock/Folk
1,M,2021-01-01 20:33:00,2021-01-01,Vendredi,20,John Coltrane,The heavyweight Champion,undefined,Jazz/Blues
2,M,2021-01-02 10:34:00,2021-01-02,Samedi,10,Urban Species,Listen,undefined,Electro/Rap/Techno
3,M,2021-01-02 11:11:00,2021-01-02,Samedi,11,Gabriel Faure,Requiem,undefined,Classique/Contemporain
4,M,2021-01-02 12:14:00,2021-01-02,Samedi,12,Andrew Bird,And the mysterious production of eggs,undefined,Rock/Folk
...,...,...,...,...,...,...,...,...,...
871,M,2021-12-28 21:59:00,2021-12-28,Mardi,21,Jean-Sébastien Bach,Cello Suites,undefined,Classique/Contemporain
872,M,2021-12-29 08:37:00,2021-12-29,Mercredi,8,Ornette Coleman,The shape of Jazz to come,undefined,Jazz/Blues
873,M,2021-12-29 22:59:00,2021-12-29,Mercredi,22,Jimmy Smith,Midnight Special,undefined,Jazz/Blues
874,M,2021-12-30 19:08:00,2021-12-30,Jeudi,19,Oscar Peterson,An evening with Oscar Peterson,undefined,Jazz/Blues
