# Music Perception & Cognition: Cascading Reminiscence Bump
## Explonatary Data Analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

# 1. DATA CLEAN-UP

In this section, the data is cleaned and matched with their rating numbers.

In [2]:
results = pd.read_csv("MPC_Form_English.csv")

In [3]:
results.head(1)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,1,Unnamed: 5,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs...",Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 66,8,Unnamed: 68,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs....7",Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75
0,timestamps,Year in which you were born,Year of birth of mother / father:,Year of birth of his father / mother:,"Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""",...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d..."


## 1.1. Separate Participant Info

In [4]:
participant_info = results.iloc[1:,:4]

In [5]:
participant_info.head(2)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3
1,24/11/2020 14:04:46,1993,1961,1961
2,24/11/2020 14:13:34,1998,1969,1965


### Rename the columns 

In [6]:
participant_info.drop(columns={"Unnamed: 0"},inplace=True)
participant_info.rename(columns={"Unnamed: 1": "Participant Birthdate", "Unnamed: 2":"Parent 1 Birthdate",
                                "Unnamed: 3": "Parent 2 Birthdate"},inplace=True)

In [7]:
participant_info.head(4)

Unnamed: 0,Participant Birthdate,Parent 1 Birthdate,Parent 2 Birthdate
1,1993,1961,1961
2,1998,1969,1965
3,1996,1964,1967
4,1998,1968,1970


In [9]:
#SAVE PARTICIPANT INFO
#participant_info.to_csv("ParticipantInfo.csv",index=None)

## 1.2. Separate Song Periods

The song clips were in random order so the list below explains the randomness of itself.

In [10]:
#THE ORDER OF SONG PERIODS
period_order = [1970,1985,1980,2000,1995,1975,1990,2005]

In [11]:
results.drop(columns={"Unnamed: 0","Unnamed: 1","Unnamed: 2","Unnamed: 3"},inplace=True)

In [12]:
results.head(1)

Unnamed: 0,1,Unnamed: 5,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs...",Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,4,...,Unnamed: 66,8,Unnamed: 68,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs....7",Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75
0,"Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d..."


In [13]:
def separate_periods(df, period_order):
    for start, period in zip(range(0,72,9), period_order):
        df_period = df.iloc[:,start: start+9]
        df_period.columns = df_period.iloc[0]
        df_period = df_period.iloc[1:]
        df_period.rename(columns={'Of the 10 songs, how many have recognized about?':'Recalled Songs',
           'Would you say that some / s of the songs he has / have evoked personal memories?': 'Evoke Memory',
           '...when I was a child"':'Childhood', '...recently"':'Recent', '...with my parents"':'Parents',
           '...with other people who are not my parents"':'Other People', '...alone"':'Alone',
           'Would you say that your memories are clear / vivid?':'Vividness',
           'If you indicated yes, describe the memory in detail:':'Memory Context'},inplace=True)
        df_period.to_csv("{}_words.csv".format(str(period)), index=None)
        

In [14]:
separate_periods(results, period_order)

## 1.3. Words to Rankings

In [26]:
def word_to_rank(df,year):
    df.replace(["None","Some)","Half","The majority","All","Yes","No"],[0,1,2,3,4,1,0], inplace=True)
    df.to_csv("SongPeriod_Ratings/{}_rankings.csv".format(year), index=None)

In [27]:
for year in range(1970,2010,5):
    df = pd.read_csv("SongPeriod_Ratings/{}_words.csv".format(year))
    word_to_rank(df, str(year))

# 2. DATA MERGING

In [28]:
period_order

[1970, 1985, 1980, 2000, 1995, 1975, 1990, 2005]

In [29]:
for filename in period_order:
    filepath = "SongPeriod_Ratings/{}_rankings.csv".format(filename)
    df = pd.read_csv(filepath)
    df.set_index(participant_info.index, inplace=True)
    df.insert(0,"Song Period",filename)
    participant_info = pd.concat([participant_info,df], axis=1)

In [30]:
participant_info.to_csv("MPC_Cleaned_Data.csv",index=None)

# 3. DATA ANALYSIS

In [31]:
df = pd.read_csv("MPC_Cleaned_Data.csv")

## 3.1. Get Descriptive Statistics (Mean, Std..)

In [32]:
general_info = defaultdict(dict)

In [33]:
participant = df.iloc[:,:3]

In [58]:
participant.head(2)

Unnamed: 0,Participant Birthdate,Parent 1 Birthdate,Parent 2 Birthdate
0,1993,1961,1961
1,1998,1969,1965


In [60]:
participant_stat = participant.describe()

In [61]:
participant_stat.to_csv("Participant_Descriptive_Stat.csv")

## 3.1.1. Get Descriptive Stats for Vividness 

In [38]:
#DESCRIPTIVE STATISTICS OF SONG PERIODS 
#(Vividness NaN values filled with 0 to obtain vividness among all participants)

for period in period_order:
    filepath = "SongPeriod_Ratings/{}_rankings.csv".format(period)
    df_stat = pd.read_csv(filepath)
    df_stat["Vividness"].fillna(value=0, inplace=True)
    df_stat = df_stat.describe()
    df_stat.to_csv(f"SongPeriod_Ratings/{period}_rankings_statistics_vividness.csv")

In [53]:
#MEAN OF VIVIDNESS
df_mean_all_vividness = pd.DataFrame()
for period in period_order:
    filepath = "SongPeriod_Ratings/{}_rankings_statistics_vividness.csv".format(period)
    df_mean = pd.read_csv(filepath).set_index("Unnamed: 0")
    df_mean = pd.DataFrame(df_mean.loc["mean"].T).rename(columns={"mean":str(period)})
    df_mean_all_vividness = pd.concat([df_mean_all_vividness,df_mean],axis=1)

In [54]:
df_mean_all_vividness.sort_index(axis=1,inplace=True)

In [55]:
df_mean_all_vividness = df_mean_all_vividness.T

In [56]:
df_mean_all_vividness.head(4)

Unnamed: 0,Recalled Songs,Evoke Memory,Childhood,Recent,Parents,Other People,Alone,Vividness
1970,2.217391,0.608696,1.928571,0.714286,2.0,1.071429,0.5,0.26087
1975,1.782609,0.608696,2.071429,0.785714,1.642857,1.357143,0.785714,0.434783
1980,2.478261,0.73913,2.0,1.176471,1.764706,1.529412,1.529412,0.521739
1985,2.73913,0.869565,1.85,1.05,1.55,1.35,0.9,0.565217


In [57]:
df_mean_all_vividness.to_csv("SongPeriods_DependentVar_MEAN_vividness.csv")

## 3.1.2. Get Descriptive Stats for others

In [49]:
#DESCRIPTIVE STATISTICS OF SONG PERIODS

for period in period_order:
    filepath = "SongPeriod_Ratings/{}_rankings.csv".format(period)
    df_stat = pd.read_csv(filepath)
    df_stat = df_stat.describe()
    df_stat.to_csv(f"SongPeriod_Ratings/{period}_rankings_statistics.csv")

In [51]:
for stat in ["mean","std"]:
    df_mean_all = pd.DataFrame()
    for period in period_order:
        filepath = "SongPeriod_Ratings/{}_rankings_statistics.csv".format(period)
        df_mean = pd.read_csv(filepath).set_index("Unnamed: 0")
        df_mean = pd.DataFrame(df_mean.loc[f"{stat}"].T).rename(columns={f"{stat}":str(period)})
        df_mean_all = pd.concat([df_mean_all,df_mean],axis=1)
    df_mean_all.sort_index(axis=1,inplace=True)
    df_mean_all = df_mean_all.T
    df_mean_all.to_csv(f"SongPeriods_DependentVar_{stat}.csv")

In [52]:
#descriptive_stat = participant.describe()
#descriptive_stat.to_csv("Participant_Descriptive_Stat.csv")

## 3.2. Calculate the count of evoked memories and vividness

In [56]:
df_count_all = pd.DataFrame()

for period in period_order:
    filepath = "SongRatings/{}_rankings_statistics.csv".format(period)
    df_mean = pd.read_csv(filepath).set_index("Unnamed: 0")
    df_mean = pd.DataFrame(df_mean.loc["mean"].T).rename(columns={"mean":str(period)})
    df_count_all = pd.concat([df_count_all,df_mean],axis=1)

In [63]:
df_count_all["1970"][1]*100

60.86956521739131

## Parents having a year gap

In [64]:
P1 = participant_info[participant_info["Parent 1 Birthdate"] < 1955]

TypeError: '<' not supported between instances of 'str' and 'int'

In [16]:
P2 = participant_info[participant_info["Parent 2 Birthdate"] < 1955]

In [23]:
age_gap = [P2.index[0], P1.index[1], P1.index[0]]

In [24]:
age_gap

[6, 13, 12]

In [25]:
cleaned_data = pd.read_csv("MPC_Cleaned_Data.csv")

In [56]:
cleaned_data.head(2)

Unnamed: 0,Participant Birthdate,Mother Birthdate,Father Birthdate,Song Period,Recalled Songs,Evoke Memory,Childhood,Recent,Parents,Other People,...,Song Period.7,Recalled Songs.7,Evoke Memory.7,Childhood.7,Recent.7,Parents.7,Other People.7,Alone.7,Vividness.7,Memory Context.7
0,1993,1961,1961,1970,2,1,1.0,1.0,1.0,1.0,...,2005,1,1,1.0,0.0,0.0,1.0,1.0,1.0,Go for a concrete sidewalk in my neighborhood ...
1,1998,1969,1965,1970,1,1,1.0,0.0,3.0,1.0,...,2005,1,0,,,,,,,


In [34]:
#THE ORDER OF SONG PERIODS
period_order = [1985,1980,2000,1995,1975,1990,2005]

In [114]:
cleaned_data.iloc[6,:3]

Participant Birthdate    1996
Mother Birthdate         1964
Father Birthdate         1951
Name: 6, dtype: object

In [130]:
p1 = defaultdict(dict)
p2 = defaultdict(dict)
p1[1970] = cleaned_data["Recalled Songs"][6]
p2[1970] = cleaned_data["Recalled Songs"][13]


In [131]:
for year,i in zip(period_order,range(1,8)):
    p1[year] = cleaned_data[f"Recalled Songs.{i}"][6]
    p2[year] = cleaned_data[f"Recalled Songs.{i}"][13]

In [134]:
df_p1 = pd.DataFrame(list(p1.values()),index=list(p1.keys()),columns=["Recalled Songs"])
df_p2 = pd.DataFrame(list(p2.values()),index=list(p2.keys()),columns=["Recalled Songs"])
df_p3 = pd.DataFrame(list(p3.values()),index=list(p3.keys()),columns=["Recalled Songs"])

In [135]:
df_p1.sort_index(inplace=True)
df_p2.sort_index(inplace=True)


In [138]:
df_p1.to_csv("AgeGap_1.csv")

In [139]:
df_p2.to_csv("AgeGap_2.csv")


In [136]:
df_p1["Participant"] = cleaned_data["Participant Birthdate"][6]
df_p1["Parent 1 Birthdate"] = cleaned_data["Mother Birthdate"][6]
df_p1["Parent 2 Birthdate"] = cleaned_data["Father Birthdate"][6]

df_p2["Participant"] = cleaned_data["Participant Birthdate"][13]
df_p2["Parent 1 Birthdate"] = cleaned_data["Mother Birthdate"][13]
df_p2["Parent 2 Birthdate"] = cleaned_data["Father Birthdate"][13]
    

In [137]:
df_p1

Unnamed: 0,Recalled Songs,Participant,Parent 1 Birthdate,Parent 2 Birthdate
1970,2,1996,1964,1951
1975,1,1996,1964,1951
1980,3,1996,1964,1951
1985,2,1996,1964,1951
1990,2,1996,1964,1951
1995,3,1996,1964,1951
2000,2,1996,1964,1951
