In [1]:
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET  # parse nested xml

In [2]:
people_base = pd.read_xml('dataset.xml')
print(people_base.head())
# multi-level xml tags were not parsed properly, hence we have to
# remove those columns

people_base.columns[-1:-8:-1]
people_base.drop(people_base.columns[-1:-8:-1], axis=1, inplace=True)
people_base.head()

# people_base will serve as the base table containing simple columns.

   UserID  Age  Gender       Living  YearsOfMusicEducation  \
0       1   27    Male  Countryside                      0   
1      13   20  Female  Countryside                      8   
2      86   50    Male  Countryside                      0   
3     124   23  Female         City                      5   
4     207   23    Male         City                      0   

   YearsOfInstrumentPlaying  MusicListeningPerDay TakeMoodDrugs  \
0                         0                     3           Yes   
1                        14                     4            No   
2                         0                     3            No   
3                         0                     2            No   
4                         0                     4            No   

  UnderInfluenceOfDrugs  PreferredGenres  CurrentMoodVA  CurrentMoodColor  \
0                   Yes              NaN            NaN               NaN   
1                    No              NaN            NaN               

Unnamed: 0,UserID,Age,Gender,Living,YearsOfMusicEducation,YearsOfInstrumentPlaying,MusicListeningPerDay,TakeMoodDrugs,UnderInfluenceOfDrugs
0,1,27,Male,Countryside,0,0,3,Yes,Yes
1,13,20,Female,Countryside,8,14,4,No,No
2,86,50,Male,Countryside,0,0,3,No,No
3,124,23,Female,City,5,0,2,No,No
4,207,23,Male,City,0,0,4,No,No


# Data Extraction and Cleanup

We first have to extract the dataset from xml to pandas Dataframe. Although pandas has a `read_xml()` function, it cannot parse nested xml elements. 
For which we need to manually parse the xml.
We use `xml` from Python standard library.

First, a function `nested_extract()` recursively finds nested tags and puts them within lists. As a result, we end up getting a multi-level list for
some of the tags. But this helps us create a complete Dataframe, which we can later modify and clean up.

In [3]:
tree = ET.parse('dataset.xml') # parse xml
root = tree.getroot()          # root node


def nested_extract(node):      # recursively produce nested lists
    if (list(node)):           # from multi-level child nodes
        final = [nested_extract(elem) for elem in node]
        return final
    return node.text


columns = []
for item in root:
    for cols in item:
        columns.append(cols.tag)
    break
print(f"Number of 1st-level columns: {len(columns)}")

tree = nested_extract(root)
tree_df = pd.DataFrame(tree, columns=columns)

tree_cp = tree_df.copy()
print(f"Columns: {tree_cp.columns.to_list()}")
print(tree_cp.iloc[0, :])

Number of 1st-level columns: 16
Columns: ['UserID', 'Age', 'Gender', 'Living', 'YearsOfMusicEducation', 'YearsOfInstrumentPlaying', 'MusicListeningPerDay', 'TakeMoodDrugs', 'UnderInfluenceOfDrugs', 'PreferredGenres', 'CurrentMoodVA', 'CurrentMoodColor', 'CurrentMoodA', 'EmotionVA', 'EmotionHSV', 'Songs']
UserID                                                                      1
Age                                                                        27
Gender                                                                   Male
Living                                                            Countryside
YearsOfMusicEducation                                                       0
YearsOfInstrumentPlaying                                                    0
MusicListeningPerDay                                                        3
TakeMoodDrugs                                                             Yes
UnderInfluenceOfDrugs                                                 

In [4]:
tree_cp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741 entries, 0 to 740
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   UserID                    741 non-null    object
 1   Age                       741 non-null    object
 2   Gender                    741 non-null    object
 3   Living                    741 non-null    object
 4   YearsOfMusicEducation     741 non-null    object
 5   YearsOfInstrumentPlaying  741 non-null    object
 6   MusicListeningPerDay      741 non-null    object
 7   TakeMoodDrugs             741 non-null    object
 8   UnderInfluenceOfDrugs     741 non-null    object
 9   PreferredGenres           741 non-null    object
 10  CurrentMoodVA             741 non-null    object
 11  CurrentMoodColor          741 non-null    object
 12  CurrentMoodA              738 non-null    object
 13  EmotionVA                 738 non-null    object
 14  EmotionHSV                

`tree_cp`contains a few null values from columns `CurrentMoodA` and `EmotionVA`. Since its only 3 samples, we can safely drop them.

In [5]:
null_users = tree_cp[tree_cp.isna().any(axis=1)]['UserID'] \
             .astype('int64').to_list()
print(f"User ID for the rows containing null values: {null_users}")

User ID for the rows containing null values: [411, 867, 934]


In [6]:
tree_cp.dropna(inplace=True)
tree_cp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 738 entries, 0 to 740
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   UserID                    738 non-null    object
 1   Age                       738 non-null    object
 2   Gender                    738 non-null    object
 3   Living                    738 non-null    object
 4   YearsOfMusicEducation     738 non-null    object
 5   YearsOfInstrumentPlaying  738 non-null    object
 6   MusicListeningPerDay      738 non-null    object
 7   TakeMoodDrugs             738 non-null    object
 8   UnderInfluenceOfDrugs     738 non-null    object
 9   PreferredGenres           738 non-null    object
 10  CurrentMoodVA             738 non-null    object
 11  CurrentMoodColor          738 non-null    object
 12  CurrentMoodA              738 non-null    object
 13  EmotionVA                 738 non-null    object
 14  EmotionHSV                

There are a total of 16 columns, many of these columns are composite and contain lists(of lists) which we need to flatten.

## Composite Column Modification

Composite columns list 
* 'PreferredGenres', 
* 'CurrentMoodVA', 
* 'CurrentMoodColor', 
* 'CurrentMoodA', 
* 'EmotionVA', 
* 'EmotionHSV', 
* 'Songs'



### 1. PreferredGenre
Columns containing lists of categorical values can be exploded into binary columns containing the frequency of each category for every row.

PreferredGenre column in `tree_cp` contains lists of favourite genres.

We create a separate table containing `UserID` and the genre columns for each user.

In [7]:
user = pd.DataFrame({'UserID': tree_cp['UserID'].astype('int64')})
genre_exploded = tree_cp['PreferredGenres'].explode() # expand list of genres for a sample
                                                      # into individual rows
                
                                                      # frequency of each genre for each user
genre_df = user.join(pd.crosstab(genre_exploded.index, genre_exploded))
genre_df

Unnamed: 0,UserID,Alternative,Blues,Classical,Country,Dance/Disco,Easy Listening,Electronica,Folk,Hip Hop/Rap,...,Latin,Metal,New Age,Opera,Pop,Punk,R&B/Soul,Reggae,Rock,Vocal
0,1,0,0,0,0,0,0,0,0,0,...,0,1,0,1,1,0,0,0,0,0
1,13,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
2,86,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,1,0
3,124,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,1,0
4,207,0,0,0,0,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736,763,0,0,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
737,517,0,0,0,0,1,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
738,292,0,0,1,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
739,27,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0


### 2. CurrentMoodVA

This column contains 2-tuple float containing the `Valence` and `Arousal` values for their current mood in the Valence-Arousal space.
Structure:

```
item                  // each sample
  | - CurrentMoodVA
        | - V
        | - A
```

We can easily separate them into two separate columns.

In [8]:
current_mood = user.copy()
current_mood[['CurrentMoodV', 'CurrentMoodA']] = pd.DataFrame(tree_cp['CurrentMoodVA'].tolist(), index=tree_cp.index)

current_mood.head()

Unnamed: 0,UserID,CurrentMoodV,CurrentMoodA
0,1,0.8885,0.8262
1,13,0.4754,-0.1639
2,86,0.6393,0.7279
3,124,-0.0033,-0.1639
4,207,0.6525,0.3803


### 3. CurrentMoodColor

This is similar to `CurrentMoodVA`, it contains a 3-tuple containing the HSV values for the color of the current mood. 

Structure:

```
item
  | - CurrentMoodColor
        | - H
        | - S
        | - V

```
We create separate H, S, V columns within the `current_mood` table itself since that table contains all the information about the user's current mood.

In [9]:
current_mood[['H', 'S', 'V']] = pd.DataFrame(tree_cp['CurrentMoodColor'].tolist(), index=tree_cp.index)
current_mood.head()

Unnamed: 0,UserID,CurrentMoodV,CurrentMoodA,H,S,V
0,1,0.8885,0.8262,0.825,0.5,1
1,13,0.4754,-0.1639,0.366666666666667,1.0,1
2,86,0.6393,0.7279,0.641666666666667,1.0,1
3,124,-0.0033,-0.1639,0.641666666666667,0.5,1
4,207,0.6525,0.3803,0.641666666666667,0.75,1


### 4. CurrentMoodA

This column contains the value of different kinds of emotions the sample was feeling at the time, on a range of 0 (completely absent) to 1 (significantly expressed).

Structure:

```
item
  | - CurrentMoodA
        | - Active
        | - WideAwake
        | - Drowsy
        | - Inactive
        ...
```
We iterate over each sample in the xml tree and put the emotions' value in a dictionary and then parse the list of dicts into a dataframe.

In [10]:
# adding columns for different emotions

data = []

for i in range(len(people_base)):
    child = root[i]
    mood = child.find('CurrentMoodA')
    emotions = {}
    for emotion in mood:
        user_id = child.find('UserID')
        emotions[user_id.tag] = user_id.text 
        emotions[emotion.tag] = emotion.text
    data.append(emotions)    

curr_emotions = pd.DataFrame(data)
curr_emotions.dropna(inplace=True)
curr_emotions['UserID'] = curr_emotions['UserID'].astype('int64')

In [11]:
curr_emotions.head()

Unnamed: 0,UserID,Active,WideAwake,Drowsy,Inactive,Miserable,Discontent,Disappointed,Relaxed,Happy,Tired,Cheerful,Joyous,Satistfied,Sleepy,Sad,Calm,Angry
0,1,0.8914,0.6071,0.0514,0.0314,0.03,0.0129,0.0157,0.7343,0.9443,0.34,0.8843,0.6114,0.93,0.0343,0.18,0.5086,0.08
1,13,0.7129,0.6571,0.2786,0.2971,0.0457,0.0243,0.0429,0.6129,0.6586,0.13,0.5686,0.5086,0.5329,0.2786,0.0157,0.9514,0.0214
2,86,0.6757,0.7657,0.1486,0.0357,0.5014,0.6043,0.55,0.5643,0.4986,0.32,0.5571,0.6329,0.4686,0.1357,0.3914,0.4714,0.5543
3,124,0.0543,0.1843,0.9429,0.93,0.0571,0.1229,0.1,0.5971,0.5871,0.7714,0.14,0.5129,0.7643,0.8671,0.0457,0.8414,0.1029
4,207,0.4557,0.8014,0.1314,0.4543,0.2114,0.0686,0.18,0.67,0.5543,0.2871,0.4943,0.5686,0.5357,0.2314,0.09,0.8271,0.1086


Now we can merge `curr_emotions` with `current_mood`

In [12]:
current_mood = current_mood.merge(curr_emotions, on='UserID')
current_mood

Unnamed: 0,UserID,CurrentMoodV,CurrentMoodA,H,S,V,Active,WideAwake,Drowsy,Inactive,...,Relaxed,Happy,Tired,Cheerful,Joyous,Satistfied,Sleepy,Sad,Calm,Angry
0,1,0.8885,0.8262,0.825,0.5,1,0.8914,0.6071,0.0514,0.0314,...,0.7343,0.9443,0.34,0.8843,0.6114,0.93,0.0343,0.18,0.5086,0.08
1,13,0.4754,-0.1639,0.366666666666667,1,1,0.7129,0.6571,0.2786,0.2971,...,0.6129,0.6586,0.13,0.5686,0.5086,0.5329,0.2786,0.0157,0.9514,0.0214
2,86,0.6393,0.7279,0.641666666666667,1,1,0.6757,0.7657,0.1486,0.0357,...,0.5643,0.4986,0.32,0.5571,0.6329,0.4686,0.1357,0.3914,0.4714,0.5543
3,124,-0.0033,-0.1639,0.641666666666667,0.5,1,0.0543,0.1843,0.9429,0.93,...,0.5971,0.5871,0.7714,0.14,0.5129,0.7643,0.8671,0.0457,0.8414,0.1029
4,207,0.6525,0.3803,0.641666666666667,0.75,1,0.4557,0.8014,0.1314,0.4543,...,0.67,0.5543,0.2871,0.4943,0.5686,0.5357,0.2314,0.09,0.8271,0.1086
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733,763,0.4813,-0.6295,0.183333333333333,0.75,1,0.3572,0.9829,0.0243,0.5986,...,0.9986,0.8515,0.02,0.8657,0.6843,0.7,0.0186,0.0172,0.5172,0.2357
734,517,0.4361,-0.3344,0.0916666666666667,0.5,1,0.0371,0.61,0.7586,0.8186,...,0.6457,0.4829,0.6429,0.1743,0.1829,0.2386,0.65,0.5029,0.24,0.38
735,292,0.0033,0.0131,0.916666666666667,0.75,1,0.3143,0.9515,0.0186,0.56,...,0.7243,0.3529,0.59,0.7143,0.2015,0.7329,0.0886,0.0286,0.9086,0.0415
736,27,0.6852,0.765,0.458333333333333,0.75,1,0.7471,0.8457,0.11,0.0271,...,0.6471,0.5414,0.2014,0.6429,0.5029,0.9871,0.1886,0.0286,0.27,0.0314


In [13]:
current_mood[current_mood['UserID'] == 411]

Unnamed: 0,UserID,CurrentMoodV,CurrentMoodA,H,S,V,Active,WideAwake,Drowsy,Inactive,...,Relaxed,Happy,Tired,Cheerful,Joyous,Satistfied,Sleepy,Sad,Calm,Angry


In [14]:
people_base.to_csv('base.csv')
current_mood.to_csv('current_mood.csv')

## Composite Columns `EmotionVA` and `EmotionHSV`

These two columns are 3 level deep. Both of these columns contain subcolumns for each emotions and within them provide the metrics (VA or HSV) in subcolumns.
We create a utility function `extract_composites()` which extracts specifically these columns into a dataframe with `UserID` primary key and the expanded structure with the subcolumn names getting appended hierarchically.

In [15]:
def extend_composites(file: str, column: str)-> pd.DataFrame:
    data = []
    root = ET.parse(file).getroot()
    for child in root:
        entry = {}
        entry['UserID'] = int(child.find('UserID').text)

        if child.find(column):
            for vals in child.find(column):
                for val_entry in range(len(vals)):
                    entry[vals.tag + vals[val_entry].tag] = vals[val_entry].text

        data.append(entry)
    return pd.DataFrame(data)

test = extend_composites('dataset.xml', 'EmotionVA')
test

Unnamed: 0,UserID,FearV,FearA,EnergeticV,EnergeticA,AngerV,AngerA,RelaxedV,RelaxedA,HappinessV,...,SadnessV,SadnessA,LivelinessV,LivelinessA,JoyV,JoyA,DisappointmentV,DisappointmentA,DiscontentV,DiscontentA
0,1,-0.6164,-0.1049,0.8197,0.9574,0.2885,0.2033,0.4197,0.5377,0.682,...,-0.8918,0.5443,0.8328,0.7016,0.9443,0.4918,-0.5049,0.3016,-0.8918,0.8131
1,13,-0.6295,0.2361,0.8197,0.859,-0.5639,0.7344,0.9311,0.1246,0.9377,...,-0.682,-0.0721,0.8852,0.9443,0.977,0.9836,-0.2033,0.0984,-0.0787,0.0787
2,86,,,-0.3607,0.7738,,,0.5902,-0.6492,0.7869,...,-0.9836,-0.7934,0.1705,0.9508,0.6361,0.6885,-0.7541,0.3148,0.3672,0.5049
3,124,-0.9836,0.918,0.7213,0.8,-0.2689,0.9246,0.977,-0.3803,0.9049,...,-0.8066,-0.8459,0.859,0.5705,0.7803,0.2885,-0.9049,-0.6098,-0.2689,-0.3672
4,207,-0.7016,0.3475,0.282,0.8328,-0.4721,0.8393,0.3475,0.4262,0.5443,...,-0.459,-0.7607,0.5639,0.9443,0.7213,0.7016,-0.1049,-0.0918,-0.6098,-0.3803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736,763,-0.9049,0.9705,0.8852,0.918,-0.9443,0.8852,0.9639,-0.8525,0.9967,...,-0.9246,0.7607,0.9639,0.9377,0.9246,0.0656,-0.9967,-0.8656,-0.9377,-0.8459
737,517,-0.9311,0.8197,0.3803,0.9311,-0.4852,0.8656,0.8787,0.1639,0.2164,...,-0.9443,-0.7344,0.459,0.9967,0.741,0.5115,-0.8787,-0.577,-0.9311,0.2295
738,292,-0.9967,-0.0721,-0.4328,0.6557,-0.8459,0.8918,0.4721,0.5049,0.9246,...,-0.8459,-0.3738,0.5902,0.6951,0.8459,0.5639,-0.3672,-0.3344,-0.4525,-0.6426
739,27,-0.9508,0.2951,0.6098,0.8328,-0.7213,0.9443,0.4984,-0.8525,0.9705,...,-0.918,-0.9574,,,0.9639,0.2754,-0.9377,-0.8787,-0.9377,-0.8


## Emotions
Next we have columns `EmotionVA`, `EmotionHSV` which provide descriptions about different emotions in general. These are not related to the current mood, hence its better to expand them into a separate dataframe.

Structure:

```
item
  | - EmotionVA
        | - Fear
        |     | - V
        |     | - A
        | - Anger
        |     | - V
        |     | - A
        ...

The general structure would be something as followed. We will separate each emotion into 2 columns with their V & A values.

##########################################################
# UserID | FearV | FearA | EnergeticV | EnergeticA | ... #
##########################################################
```



In [16]:
# # from itertools import product

# va_emotions_table = []


# emotion_va = []

# emotion_names = set()

# for i in range(len(people_base)):    # TODO: use the total length as a constant, people_base unnecessary
#     child = root[i]
#     node = child.find('EmotionVA')
    
#     emotion_vals = {}
#     emotion_vals['UserID'] = child.find('UserID').text
#     for va in node:
#         emotion_names.add(va.tag)
#         key_V = va.tag + 'V'
#         key_A = va.tag + 'A'
#         emotion_vals[key_V] = va[0].text
#         emotion_vals[key_A] = va[1].text
#     emotion_va.append(emotion_vals)

# test = pd.DataFrame(emotion_va)
# test['UserID'] = test['UserID'].astype('int64')

emotion_va = extend_composites('dataset.xml', 'EmotionVA')
emotion_va = emotion_va[~emotion_va['UserID'].isin(null_users)]
emotion_va

Unnamed: 0,UserID,FearV,FearA,EnergeticV,EnergeticA,AngerV,AngerA,RelaxedV,RelaxedA,HappinessV,...,SadnessV,SadnessA,LivelinessV,LivelinessA,JoyV,JoyA,DisappointmentV,DisappointmentA,DiscontentV,DiscontentA
0,1,-0.6164,-0.1049,0.8197,0.9574,0.2885,0.2033,0.4197,0.5377,0.682,...,-0.8918,0.5443,0.8328,0.7016,0.9443,0.4918,-0.5049,0.3016,-0.8918,0.8131
1,13,-0.6295,0.2361,0.8197,0.859,-0.5639,0.7344,0.9311,0.1246,0.9377,...,-0.682,-0.0721,0.8852,0.9443,0.977,0.9836,-0.2033,0.0984,-0.0787,0.0787
2,86,,,-0.3607,0.7738,,,0.5902,-0.6492,0.7869,...,-0.9836,-0.7934,0.1705,0.9508,0.6361,0.6885,-0.7541,0.3148,0.3672,0.5049
3,124,-0.9836,0.918,0.7213,0.8,-0.2689,0.9246,0.977,-0.3803,0.9049,...,-0.8066,-0.8459,0.859,0.5705,0.7803,0.2885,-0.9049,-0.6098,-0.2689,-0.3672
4,207,-0.7016,0.3475,0.282,0.8328,-0.4721,0.8393,0.3475,0.4262,0.5443,...,-0.459,-0.7607,0.5639,0.9443,0.7213,0.7016,-0.1049,-0.0918,-0.6098,-0.3803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736,763,-0.9049,0.9705,0.8852,0.918,-0.9443,0.8852,0.9639,-0.8525,0.9967,...,-0.9246,0.7607,0.9639,0.9377,0.9246,0.0656,-0.9967,-0.8656,-0.9377,-0.8459
737,517,-0.9311,0.8197,0.3803,0.9311,-0.4852,0.8656,0.8787,0.1639,0.2164,...,-0.9443,-0.7344,0.459,0.9967,0.741,0.5115,-0.8787,-0.577,-0.9311,0.2295
738,292,-0.9967,-0.0721,-0.4328,0.6557,-0.8459,0.8918,0.4721,0.5049,0.9246,...,-0.8459,-0.3738,0.5902,0.6951,0.8459,0.5639,-0.3672,-0.3344,-0.4525,-0.6426
739,27,-0.9508,0.2951,0.6098,0.8328,-0.7213,0.9443,0.4984,-0.8525,0.9705,...,-0.918,-0.9574,,,0.9639,0.2754,-0.9377,-0.8787,-0.9377,-0.8


In [17]:
emotion_va.to_csv('data_emotionVA.csv')

## Emotions HSV values

There is also another segment called `EmotionHSV` which contains HSV values for each emotions in general. For that too, we can incorporate similar measures.

```
item
  | - EmotionHSV
        | - Fear
        |     | - H
        |     | - S
        |     | - V
        | - Anger
        |     | - H
        |     | - S
        |     | - V
        ...
```
Similar to EmotionVA, we can use the extend function here as well.


In [18]:
emotion_hsv = extend_composites('dataset.xml', 'EmotionHSV')
emotion_hsv = emotion_hsv[~emotion_hsv['UserID'].isin(null_users)]
emotion_hsv

Unnamed: 0,UserID,FearH,FearS,FearV,EnergeticH,EnergeticS,EnergeticV,AngerH,AngerS,AngerV,...,LivelinessV,JoyH,JoyS,JoyV,DisappointmentH,DisappointmentS,DisappointmentV,DiscontentH,DiscontentS,DiscontentV
0,1,0.55,1,1,0,0.5,1,0.641666666666667,1,1,...,1,0,1,1,0.916666666666667,0.25,1,0,0,0.25
1,13,0,0,0.5,0,0.75,1,0.641666666666667,1,1,...,1,0.366666666666667,1,1,0.275,1,1,0.366666666666667,1,1
2,86,0,0,0.25,0,1,1,0,0,0,...,1,0.366666666666667,0.75,1,0.366666666666667,1,1,0.183333333333333,0.75,1
3,124,0.0916666666666667,0.5,1,0,1,1,0.733333333333333,0.75,1,...,1,0.641666666666667,0.75,1,0.55,1,1,0.458333333333333,1,1
4,207,0,0.5,1,0,0,0,0,0,0.25,...,1,0.275,0.5,1,0.458333333333333,0.5,1,0.183333333333333,0.75,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736,763,0.641666666666667,0.75,1,0,0,0,0,0,0.25,...,1,0.275,0.5,1,0,1,1,0.183333333333333,1,1
737,517,0.641666666666667,0.75,1,0,0.75,1,0,0,0.5,...,1,0.275,0.75,1,0.0916666666666667,0.75,1,0,0.75,1
738,292,0,0,0.25,0,0,0.25,0,0,0,...,1,0.458333333333333,0.75,1,0.825,1,1,0.183333333333333,0.75,1
739,27,0,0,0,0,1,1,0.641666666666667,1,1,...,1,0.55,1,1,0.183333333333333,1,1,0.183333333333333,1,1


In [19]:
emotion_hsv.to_csv('data_emotionHSV.csv')

# Extracting Songs data
Each user has a list of items under Songs tag which denotes their emotional response in the V-A scale for the song samples in the attached songs collection. 
The induced emotions mean the emotional responses that the musics caused in the listener personally. The perceived emotions are the emotional responses associated with the songs themselves.
