In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1. DATA CLEAN-UP

In [352]:
results = pd.read_csv("MPC_Form_English.csv")

In [353]:
results.head(1)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,1,Unnamed: 5,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs...",Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 66,8,Unnamed: 68,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs....7",Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75
0,timestamps,Year in which you were born,Year of birth of mother / father:,Year of birth of his father / mother:,"Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""",...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d..."


## 1.1. Separate Participant Info

In [354]:
participant_info = results.iloc[1:,:4]

In [355]:
participant_info.head(2)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3
1,24/11/2020 14:04:46,1993,1961,1961
2,24/11/2020 14:13:34,1998,1969,1965


In [356]:
participant_info.drop(columns={"Unnamed: 0"},inplace=True)
participant_info.rename(columns={"Unnamed: 1": "Participant Birthdate", "Unnamed: 2":"Mother Birthdate",
                                "Unnamed: 3": "Father Birthdate"},inplace=True)

In [358]:
participant_info.head(4)

Unnamed: 0,Participant Birthdate,Mother Birthdate,Father Birthdate
1,1993,1961,1961
2,1998,1969,1965
3,1996,1964,1967
4,1998,1968,1970


In [308]:
#SAVE PARTICIPANT INFO
participant_info.to_csv("ParticipantInfo.csv",index=None)

## 1.2. Separate Song Periods

In [5]:
#THE ORDER OF SONG PERIODS
period_order = [1970,1985,1980,2000,1995,1975,1990,2005]

In [263]:
results.drop(columns={"Unnamed: 0","Unnamed: 1","Unnamed: 2","Unnamed: 3"},inplace=True)

In [275]:
results.head(1)

Unnamed: 0,1,Unnamed: 5,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs...",Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,4,...,Unnamed: 66,8,Unnamed: 68,"Indicate how many songs are in accordance with the following statements.""I remeber hearing those songs....7",Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75
0,"Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",...,"If you indicated yes, describe the memory in d...","Of the 10 songs, how many have recognized about?",Would you say that some / s of the songs he ha...,"...when I was a child""","...recently""","...with my parents""","...with other people who are not my parents""","...alone""",Would you say that your memories are clear / v...,"If you indicated yes, describe the memory in d..."


In [297]:
def separate_periods(df, period_order):
    for start, period in zip(range(0,72,9), period_order):
        df_period = df.iloc[:,start: start+9]
        df_period.columns = df_period.iloc[0]
        df_period = df_period.iloc[1:]
        df_period.rename(columns={'Of the 10 songs, how many have recognized about?':'Recalled Songs',
           'Would you say that some / s of the songs he has / have evoked personal memories?': 'Evoke Memory',
           '...when I was a child"':'Childhood', '...recently"':'Recent', '...with my parents"':'Parents',
           '...with other people who are not my parents"':'Other People', '...alone"':'Alone',
           'Would you say that your memories are clear / vivid?':'Vividness',
           'If you indicated yes, describe the memory in detail:':'Memory Context'},inplace=True)
        df_period.to_csv("{}_words.csv".format(str(period)), index=None)
        

In [298]:
separate_periods(results, period_order)

In [306]:
def word_to_rank(songs_70_74, year):
    for idx, column in enumerate(songs_70_74.columns):
        if column == "Evoke Memory" or column == "Vividness":
            y_list = np.where(songs_70_74[column] == "Yes")
            n_list = np.where(songs_70_74[column] == "No")
            for yes in y_list[0]:
                songs_70_74.iloc[yes, idx] = 1
            for no in n_list[0]:
                songs_70_74.iloc[no, idx] = 0
        else:
            majority = np.where(songs_70_74[column] == "The majority")
            some = np.where(songs_70_74[column] == "Some)")
            half = np.where(songs_70_74[column] == "Half")
            alll = np.where(songs_70_74[column] == "All")
            none = np.where(songs_70_74[column] == "None")

            for maj in majority[0]:
                songs_70_74.iloc[maj, idx] = 3

            for so in some[0]:
                songs_70_74.iloc[so, idx] = 1
            for ha in half[0]:
                songs_70_74.iloc[ha, idx] = 2
            for al in alll[0]:
                songs_70_74.iloc[al, idx] = 4

            for non in none[0]:
                songs_70_74.iloc[non, idx] = 0
    
    songs_70_74.to_csv("{}_rankings.csv".format(year), index=None)

In [307]:
for year in range(1970,2010,5):
    df = pd.read_csv("{}_words.csv".format(year))
    word_to_rank(df, str(year))

# 2. DATA MERGING

In [313]:
period_order

[1970, 1985, 1980, 2000, 1995, 1975, 1990, 2005]

In [359]:
for filename in period_order:
    filepath = "SongRatings/{}_rankings.csv".format(filename)
    df = pd.read_csv(filepath)
    df.set_index(participant_info.index, inplace=True)
    df.insert(0,"Song Period",filename)
    participant_info = pd.concat([participant_info,df], axis=1)

In [363]:
participant_info.to_csv("MPC_Cleaned_Data.csv",index=None)

# 3. DATA ANALYSIS

### Boxplots?
### Count --> Percentage

In [54]:
df = pd.read_csv("MPC_Cleaned_Data.csv")

## 3.1. Get Descriptive Statistics (Mean, Std..)

In [375]:
general_info = defaultdict(dict)

In [378]:
participant = df.iloc[:,:3]

In [383]:
descriptive_stat = participant.describe()

In [384]:
descriptive_stat.to_csv("Participant_Descriptive_Stat.csv")

In [33]:
#DESCRIPTIVE STATISTICS OF SONG PERIODS

for period in period_order:
    filepath = "SongRatings/{}_rankings.csv".format(period)
    df_stat = pd.read_csv(filepath)
    df_stat = df_stat.describe()
    df_stat.to_csv(f"SongRatings/{period}_rankings_statistics.csv")

In [43]:
df_mean_all = pd.DataFrame()
for period in period_order:
    filepath = "SongRatings/{}_rankings_statistics.csv".format(period)
    df_mean = pd.read_csv(filepath).set_index("Unnamed: 0")
    df_mean = pd.DataFrame(df_mean.loc["std"].T).rename(columns={"std":str(period)})
    df_mean_all = pd.concat([df_mean_all,df_mean],axis=1)

In [44]:
df_mean_all.sort_index(axis=1,inplace=True)

In [45]:
df_mean_all = df_mean_all.T

In [46]:
df_mean_all.to_csv("SongPeriods_DependentVar_STD.csv")

In [47]:
df_mean_all

Unnamed: 0,Recalled Songs,Evoke Memory,Childhood,Recent,Parents,Other People,Alone,Vividness
1970,0.951388,0.499011,1.071612,0.468807,1.176697,0.916875,0.650444,0.513553
1975,0.902347,0.499011,1.141139,0.892582,1.215739,0.744946,0.892582,0.468807
1980,0.790257,0.448978,1.172604,1.131111,1.032558,0.943242,1.124591,0.469668
1985,0.864312,0.34435,0.988087,0.944513,0.998683,0.812728,1.020836,0.48936
1990,1.083473,0.421741,1.017815,1.02262,0.923548,1.07861,0.785905,0.383482
1995,0.934622,0.448978,0.970143,0.866025,0.882843,1.0,1.014599,0.437237
2000,1.053884,0.387553,1.123903,0.830698,1.017393,1.067872,1.06513,0.452414
2005,1.057628,0.448978,1.390444,1.0,1.0,1.125463,0.966092,0.447214


## 3.2. Calculate the count of evoked memories and vividness

In [56]:
df_count_all = pd.DataFrame()

for period in period_order:
    filepath = "SongRatings/{}_rankings_statistics.csv".format(period)
    df_mean = pd.read_csv(filepath).set_index("Unnamed: 0")
    df_mean = pd.DataFrame(df_mean.loc["mean"].T).rename(columns={"mean":str(period)})
    df_count_all = pd.concat([df_count_all,df_mean],axis=1)

In [63]:
df_count_all["1970"][1]*100

60.86956521739131