In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

# 1. DATA CLEAN-UP

In [2]:
results = pd.read_csv("MPC_Form_English.csv")

In [3]:
results.head(1)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,1,Unnamed: 5,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs...",Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 66,8,Unnamed: 68,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs....7",Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75
0,timestamps,Year in which you were born,Year of birth of mother / father:,Year of birth of his father / mother:,"Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""",...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d..."


## 1.1. Separate Participant Info

In [4]:
participant_info = results.iloc[1:,:4]

In [5]:
participant_info.head(2)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3
1,24/11/2020 14:04:46,1993,1961,1961
2,24/11/2020 14:13:34,1998,1969,1965


In [6]:
participant_info.drop(columns={"Unnamed: 0"},inplace=True)
participant_info.rename(columns={"Unnamed: 1": "Participant Birthdate", "Unnamed: 2":"Mother Birthdate",
                                "Unnamed: 3": "Father Birthdate"},inplace=True)

In [7]:
participant_info.head(4)

Unnamed: 0,Participant Birthdate,Mother Birthdate,Father Birthdate
1,1993,1961,1961
2,1998,1969,1965
3,1996,1964,1967
4,1998,1968,1970


In [8]:
#SAVE PARTICIPANT INFO
#participant_info.to_csv("ParticipantInfo.csv",index=None)

## 1.2. Separate Song Periods

In [9]:
#THE ORDER OF SONG PERIODS
period_order = [1970,1985,1980,2000,1995,1975,1990,2005]

In [10]:
results.drop(columns={"Unnamed: 0","Unnamed: 1","Unnamed: 2","Unnamed: 3"},inplace=True)

In [11]:
results.head(1)

Unnamed: 0,1,Unnamed: 5,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs...",Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,4,...,Unnamed: 66,8,Unnamed: 68,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs....7",Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75
0,"Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d..."


In [12]:
def separate_periods(df, period_order):
    for start, period in zip(range(0,72,9), period_order):
        df_period = df.iloc[:,start: start+9]
        df_period.columns = df_period.iloc[0]
        df_period = df_period.iloc[1:]
        df_period.rename(columns={'Of the 10 songs, how many have recognized about?':'Recalled Songs',
           'Would you say that some / s of the songs he has / have evoked personal memories?': 'Evoke Memory',
           '...when I was a child"':'Childhood', '...recently"':'Recent', '...with my parents"':'Parents',
           '...with other people who are not my parents"':'Other People', '...alone"':'Alone',
           'Would you say that your memories are clear / vivid?':'Vividness',
           'If you indicated yes, describe the memory in detail:':'Memory Context'},inplace=True)
        #df_period.to_csv("{}_words.csv".format(str(period)), index=None)
        

In [298]:
separate_periods(results, period_order)

In [13]:
def word_to_rank(songs_70_74, year):
    for idx, column in enumerate(songs_70_74.columns):
        if column == "Evoke Memory" or column == "Vividness":
            y_list = np.where(songs_70_74[column] == "Yes")
            n_list = np.where(songs_70_74[column] == "No")
            for yes in y_list[0]:
                songs_70_74.iloc[yes, idx] = 1
            for no in n_list[0]:
                songs_70_74.iloc[no, idx] = 0
        else:
            majority = np.where(songs_70_74[column] == "The majority")
            some = np.where(songs_70_74[column] == "Some)")
            half = np.where(songs_70_74[column] == "Half")
            alll = np.where(songs_70_74[column] == "All")
            none = np.where(songs_70_74[column] == "None")

            for maj in majority[0]:
                songs_70_74.iloc[maj, idx] = 3

            for so in some[0]:
                songs_70_74.iloc[so, idx] = 1
            for ha in half[0]:
                songs_70_74.iloc[ha, idx] = 2
            for al in alll[0]:
                songs_70_74.iloc[al, idx] = 4

            for non in none[0]:
                songs_70_74.iloc[non, idx] = 0
    
    #songs_70_74.to_csv("{}_rankings.csv".format(year), index=None)

In [307]:
for year in range(1970,2010,5):
    df = pd.read_csv("{}_words.csv".format(year))
    word_to_rank(df, str(year))

# 2. DATA MERGING

In [49]:
period_order

[1970, 1985, 1980, 2000, 1995, 1975, 1990, 2005]

In [16]:
for filename in period_order:
    filepath = "SongPeriod_Ratings/{}_rankings.csv".format(filename)
    df = pd.read_csv(filepath)
    df.set_index(participant_info.index, inplace=True)
    df.insert(0,"Song Period",filename)
    participant_info = pd.concat([participant_info,df], axis=1)

In [363]:
#participant_info.to_csv("MPC_Cleaned_Data.csv",index=None)

# 3. DATA ANALYSIS

In [25]:
df = pd.read_csv("MPC_Cleaned_Data.csv")

## 3.1. Get Descriptive Statistics (Mean, Std..)

In [28]:
general_info = defaultdict(dict)

In [29]:
participant = df.iloc[:,:3]

## 3.1.1. Get Descriptive Stats for Vividness 

In [51]:
participant.head(2)

Unnamed: 0,Participant Birthdate,Mother Birthdate,Father Birthdate
0,1993,1961,1961
1,1998,1969,1965


In [53]:
#DESCRIPTIVE STATISTICS OF SONG PERIODS (Vividness NaN values filled with 0)

for period in period_order:
    print(period)
    filepath = "SongPeriod_Ratings/{}_rankings.csv".format(period)
    df_stat = pd.read_csv(filepath)
    df_stat["Vividness"].fillna(value=0, inplace=True)
    df_stat = df_stat.describe()
    df_stat.to_csv(f"SongPeriod_Ratings/{period}_rankings_statistics_vividness.csv")

1970
1985
1980
2000
1995
1975
1990
2005


In [58]:
df_mean_all_vividness = pd.DataFrame()
for period in period_order:
    filepath = "SongPeriod_Ratings/{}_rankings_statistics_vividness.csv".format(period)
    df_mean = pd.read_csv(filepath).set_index("Unnamed: 0")
    df_mean = pd.DataFrame(df_mean.loc["std"].T).rename(columns={"std":str(period)})
    df_mean_all_vividness = pd.concat([df_mean_all_vividness,df_mean],axis=1)

In [59]:
df_mean_all_vividness.sort_index(axis=1,inplace=True)

In [60]:
df_mean_all_vividness = df_mean_all_vividness.T

In [61]:
df_mean_all_vividness

Unnamed: 0,Recalled Songs,Evoke Memory,Childhood,Recent,Parents,Other People,Alone,Vividness
1970,0.951388,0.499011,1.071612,0.468807,1.176697,0.916875,0.650444,0.448978
1975,0.902347,0.499011,1.141139,0.892582,1.215739,0.744946,0.892582,0.50687
1980,0.790257,0.448978,1.172604,1.131111,1.032558,0.943242,1.124591,0.510754
1985,0.864312,0.34435,0.988087,0.944513,0.998683,0.812728,1.020836,0.50687
1990,1.083473,0.421741,1.017815,1.02262,0.923548,1.07861,0.785905,0.486985
1995,0.934622,0.448978,0.970143,0.866025,0.882843,1.0,1.014599,0.50687
2000,1.053884,0.387553,1.123903,0.830698,1.017393,1.067872,1.06513,0.499011
2005,1.057628,0.448978,1.390444,1.0,1.0,1.125463,0.966092,0.510754


In [62]:
df_mean_all_vividness.to_csv("SongPeriods_DependentVar_MEAN_vividness.csv")

## 3.1.2. Get Descriptive Stats for others

In [19]:
#DESCRIPTIVE STATISTICS OF SONG PERIODS

for period in period_order:
    filepath = "SongPeriod_Ratings/{}_rankings.csv".format(period)
    df_stat = pd.read_csv(filepath)
    df_stat = df_stat.describe()
    #df_stat.to_csv(f"SongPeriod_Ratings/{period}_rankings_statistics_vividness.csv")

In [20]:
df_mean_all = pd.DataFrame()
for period in period_order:
    filepath = "SongPeriod_Ratings/{}_rankings_statistics.csv".format(period)
    df_mean = pd.read_csv(filepath).set_index("Unnamed: 0")
    df_mean = pd.DataFrame(df_mean.loc["std"].T).rename(columns={"std":str(period)})
    df_mean_all = pd.concat([df_mean_all,df_mean],axis=1)

In [None]:
descriptive_stat = participant.describe()
#descriptive_stat.to_csv("Participant_Descriptive_Stat.csv")

In [44]:
df_mean_all.sort_index(axis=1,inplace=True)

In [45]:
df_mean_all = df_mean_all.T

In [46]:
#df_mean_all.to_csv("SongPeriods_DependentVar_STD.csv")

In [45]:
df_mean_all

Unnamed: 0,1970,1985,1980,2000,1995,1975,1990,2005
Recalled Songs,0.951388,0.864312,0.790257,1.053884,0.934622,0.902347,1.083473,1.057628
Evoke Memory,0.499011,0.34435,0.448978,0.387553,0.448978,0.499011,0.421741,0.448978
Childhood,1.071612,0.988087,1.172604,1.123903,0.970143,1.141139,1.017815,1.390444
Recent,0.468807,0.944513,1.131111,0.830698,0.866025,0.892582,1.02262,1.0
Parents,1.176697,0.998683,1.032558,1.017393,0.882843,1.215739,0.923548,1.0
Other People,0.916875,0.812728,0.943242,1.067872,1.0,0.744946,1.07861,1.125463
Alone,0.650444,1.020836,1.124591,1.06513,1.014599,0.892582,0.785905,0.966092
Vividness,0.513553,0.48936,0.469668,0.452414,0.437237,0.468807,0.383482,0.447214


## 3.2. Calculate the count of evoked memories and vividness

In [56]:
df_count_all = pd.DataFrame()

for period in period_order:
    filepath = "SongRatings/{}_rankings_statistics.csv".format(period)
    df_mean = pd.read_csv(filepath).set_index("Unnamed: 0")
    df_mean = pd.DataFrame(df_mean.loc["mean"].T).rename(columns={"mean":str(period)})
    df_count_all = pd.concat([df_count_all,df_mean],axis=1)

In [63]:
df_count_all["1970"][1]*100

60.86956521739131

## Correct Parent Column Names

In [2]:
participant_info = pd.read_csv("ParticipantInfo.csv")

In [4]:
participant_info.rename(columns={"Mother Birthdate": "Parent 1 Birthdate", "Father Birthdate": "Parent 2 Birthdate"}, inplace=True)

In [6]:
participant_info.head(1)

Unnamed: 0,Participant Birthdate,Parent 1 Birthdate,Parent 2 Birthdate
0,1993,1961,1961


In [7]:
participant_desc = pd.read_csv("Participant_Descriptive_Stat.csv")

In [9]:
participant_desc.rename(columns={"Mother Birthdate": "Parent 1 Birthdate", "Father Birthdate": "Parent 2 Birthdate"}, inplace=True)

In [11]:
participant_desc.head(2)

Unnamed: 0.1,Unnamed: 0,Participant Birthdate,Parent 1 Birthdate,Parent 2 Birthdate
0,count,23.0,23.0,23.0
1,mean,1995.347826,1962.217391,1962.347826


## Parents having a year gap

In [74]:
from collections import defaultdict

In [15]:
P1 = participant_info[participant_info["Parent 1 Birthdate"] < 1955]

In [16]:
P2 = participant_info[participant_info["Parent 2 Birthdate"] < 1955]

In [23]:
age_gap = [P2.index[0], P1.index[1], P1.index[0]]

In [24]:
age_gap

[6, 13, 12]

In [25]:
cleaned_data = pd.read_csv("MPC_Cleaned_Data.csv")

In [56]:
cleaned_data.head(2)

Unnamed: 0,Participant Birthdate,Mother Birthdate,Father Birthdate,Song Period,Recalled Songs,Evoke Memory,Childhood,Recent,Parents,Other People,...,Song Period.7,Recalled Songs.7,Evoke Memory.7,Childhood.7,Recent.7,Parents.7,Other People.7,Alone.7,Vividness.7,Memory Context.7
0,1993,1961,1961,1970,2,1,1.0,1.0,1.0,1.0,...,2005,1,1,1.0,0.0,0.0,1.0,1.0,1.0,Go for a concrete sidewalk in my neighborhood ...
1,1998,1969,1965,1970,1,1,1.0,0.0,3.0,1.0,...,2005,1,0,,,,,,,


In [34]:
#THE ORDER OF SONG PERIODS
period_order = [1985,1980,2000,1995,1975,1990,2005]

In [114]:
cleaned_data.iloc[6,:3]

Participant Birthdate    1996
Mother Birthdate         1964
Father Birthdate         1951
Name: 6, dtype: object

In [130]:
p1 = defaultdict(dict)
p2 = defaultdict(dict)
p3 = defaultdict(dict)
p1[1970] = cleaned_data["Recalled Songs"][6]
p2[1970] = cleaned_data["Recalled Songs"][12]
p3[1970] = cleaned_data["Recalled Songs"][13]


In [131]:
for year,i in zip(period_order,range(1,8)):
    p1[year] = cleaned_data[f"Recalled Songs.{i}"][6]
    p2[year] = cleaned_data[f"Recalled Songs.{i}"][12]
    p3[year] = cleaned_data[f"Recalled Songs.{i}"][13]

In [132]:
p1.keys()

dict_keys([1970, 1985, 1980, 2000, 1995, 1975, 1990])

In [133]:
list(p1.keys())

[1970, 1985, 1980, 2000, 1995, 1975, 1990]

In [134]:
df_p1 = pd.DataFrame(list(p1.values()),index=list(p1.keys()),columns=["Recalled Songs"])
df_p2 = pd.DataFrame(list(p2.values()),index=list(p2.keys()),columns=["Recalled Songs"])
df_p3 = pd.DataFrame(list(p3.values()),index=list(p3.keys()),columns=["Recalled Songs"])

In [135]:
df_p1.sort_index(inplace=True)
df_p2.sort_index(inplace=True)
df_p3.sort_index(inplace=True)

In [138]:
df_p1.to_csv("AgeGap_1.csv")

In [139]:
df_p2.to_csv("AgeGap_2.csv")
df_p3.to_csv("AgeGap_3.csv")

In [136]:
df_p1["Participant"] = cleaned_data["Participant Birthdate"][6]
df_p1["Parent 1 Birthdate"] = cleaned_data["Mother Birthdate"][6]
df_p1["Parent 2 Birthdate"] = cleaned_data["Father Birthdate"][6]

df_p2["Participant"] = cleaned_data["Participant Birthdate"][12]
df_p2["Parent 1 Birthdate"] = cleaned_data["Mother Birthdate"][12]
df_p2["Parent 2 Birthdate"] = cleaned_data["Father Birthdate"][12]
    
df_p3["Participant"] = cleaned_data["Participant Birthdate"][13]
df_p3["Parent 1 Birthdate"] = cleaned_data["Mother Birthdate"][13]
df_p3["Parent 2 Birthdate"] = cleaned_data["Father Birthdate"][13]

In [137]:
df_p1

Unnamed: 0,Recalled Songs,Participant,Parent 1 Birthdate,Parent 2 Birthdate
1970,2,1996,1964,1951
1975,1,1996,1964,1951
1980,3,1996,1964,1951
1985,2,1996,1964,1951
1990,2,1996,1964,1951
1995,3,1996,1964,1951
2000,2,1996,1964,1951
