In [30]:
import pandas as pd

# Import value labels and variable labels csvs
df_value_labels = pd.read_csv('value_labels.csv')
df_variable_labels = pd.read_csv('variable_labels.csv')

df_ivs_dictionary = pd.read_excel('F00011424-Common_EVS_WVS_Dictionary_IVS.xlsx', sheet_name='IVS_EVS_and_WVS_Variables')

df_countries_count = pd.read_csv('countries_count.csv')

# Merge value labels and variable labels csvs
df_labels = pd.merge(df_variable_labels, df_value_labels, left_on='vallab', right_on='lname', how='left')

# Create a label_lower value to remove duplicates
df_labels['label_lower'] = df_labels['label'].str.lower()

# Replace special characters in don't know labels
df_labels['label_lower'] = df_labels['label_lower'].str.replace("´", "'")

# Remove duplicates of label_lower by each name, vallab, varlab
df_labels = df_labels.drop_duplicates(subset=['name', 'vallab', 'varlab', 'label_lower'], keep='last')

# Create a value_label column with value and label
df_labels['value_label'] = df_labels['value'] + ': ' + df_labels['label']

# Combine values in value_label column by each name, vallab and varlab
df_labels = df_labels.groupby(['name', 'vallab', 'varlab'])['value_label'].apply(lambda x: ', '.join(x)).reset_index()

# Rename varlab to variable_label
df_labels = df_labels.rename(columns={'varlab': 'variable_label'})

# Select only the first three columns of the df_ivs_dictionary by their position
df_ivs_dictionary = df_ivs_dictionary.iloc[:, 1:4]
df_ivs_dictionary.columns = ['category', 'name', 'label']
df_ivs_dictionary = df_ivs_dictionary[['name', 'label', 'category']]

# Merge df_ivs_dictionary with df by name
df = pd.merge(df_ivs_dictionary, df_labels, on='name', how='left')

# Remove variables that are not questions
categories_not_for_questions = ["Admin/protocol variables", "Respondent's parents (EVS)", "Respondent's partner (EVS)", "Socio demographics"]

# Remove trailing spaces from category column
df['category'] = df['category'].str.strip()

df = df[~df['category'].isin(categories_not_for_questions)].reset_index(drop=True)

df = df[['name', 'variable_label', 'category', 'value_label']]

# Make the countries count data wide
df_countries_count_wide = df_countries_count.pivot(index='var', columns='year', values='count').reset_index()

# Merge the countries count data with the df
df = pd.merge(df, df_countries_count_wide, left_on='name', right_on='var', how='left')

df.to_csv('ivs_dictionary.csv', index=False)

df


Unnamed: 0,name,variable_label,category,value_label,var,1984,1993,1998,2001,2004,2010,2014,2021,2022
0,A001,Important in life: Family,Perceptions of life,"1: Very important, 2: Rather important, 3: Not...",A001,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
1,A002,Important in life: Friends,Perceptions of life,"1: Very important, 2: Rather important, 3: Not...",A002,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
2,A003,Important in life: Leisure time,Perceptions of life,"1: Very important, 2: Rather important, 3: Not...",A003,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
3,A004,Important in life: Politics,Perceptions of life,"1: Very important, 2: Rather important, 3: Not...",A004,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
4,A005,Important in life: Work,Perceptions of life,"1: Very important, 2: Rather important, 3: Not...",A005,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
672,Y023C,DIVORLIB- Welzel choice-3: Divorce acceptable,Special Indexes,".a: Don't know, .b: No answer, .c: Not applica...",Y023C,8.0,18.0,53.0,,39.0,57.0,60.0,,64.0
673,Y024,VOICE.- Welzel voice sub-index,Special Indexes,".a: Don't know, .b: No answer, .c: Not applica...",Y024,7.0,18.0,55.0,,40.0,57.0,59.0,,64.0
674,Y024A,VOICE1- Welzel voice-1,Special Indexes,"-99: -99, 0: Very high, 1: Very low, .a: Don't...",Y024A,7.0,18.0,55.0,,40.0,57.0,59.0,,64.0
675,Y024B,VOICE2- Welzel voice-2,Special Indexes,"-99: -99, 0: Bajo, 1: Alto, .a: Don't know, .b...",Y024B,,18.0,53.0,,39.0,55.0,59.0,,64.0


Unnamed: 0,year,count,var
0,1984,24,studyno
1,1993,43,studyno
2,1998,55,studyno
3,2001,33,studyno
4,2004,40,studyno
...,...,...,...
3423,1998,53,Y024C
3424,2004,39,Y024C
3425,2010,55,Y024C
3426,2014,59,Y024C


year,1984,1993,1998,2001,2004,2010,2014,2021,2022
0,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
1,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
2,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
3,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
4,,43.0,54.0,33.0,39.0,81.0,60.0,36.0,64.0
...,...,...,...,...,...,...,...,...,...
833,24.0,43.0,55.0,33.0,40.0,82.0,60.0,36.0,64.0
834,24.0,43.0,55.0,33.0,40.0,82.0,60.0,36.0,64.0
835,24.0,43.0,55.0,33.0,40.0,82.0,60.0,36.0,64.0
836,24.0,43.0,55.0,33.0,40.0,82.0,60.0,36.0,64.0
