robustness check with reiprocity from 2010 and 2015

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import math

import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols

In [2]:
# define path: insert the path where the SOEP data is stored on your computer here
from pathlib import Path
# Maxie: /Volumes/dohmen_soep/SOEP-CORE.v36eu_STATA/Stata/raw
# Max: C:/Users/max-admin/Desktop/Masterstudium/WiSe_22_23/Research_Module/SOEP-Data/Stata/raw
data_folder = Path("/Volumes/dohmen_soep/SOEP-CORE.v36eu_STATA/Stata/raw")
# define relevant subsets of SOEP-data
file_names = ['vp', 'bap', 'bfp']

file_paths = [data_folder / f"{file_name}.dta" for file_name in file_names]
# some controls are in gen data
file_paths_2 = [data_folder / f"{file_name}gen.dta" for file_name in file_names]

In [3]:
data05 = pd.read_stata(file_paths[0], columns=["pid","hid", "syear","vp12602", "vp12603", "vp12605"]).set_index(['pid', 'hid'])
df_05 = data05.rename(columns={ 'vp12602': 'take_revenge_05', 'vp12603': 'similar_problems_05', 'vp12605': 'insult_back_05'})


In [4]:
data10= pd.read_stata(file_paths[1], columns=["pid", "hid", "syear",'bap12402','bap12403','bap12405']).set_index(['pid', 'hid'])
df_10 = data10.rename(columns = {'bap12402': 'take_revenge_10','bap12403': 'similar_problems_10','bap12405': 'insult_back_10'})

In [5]:
data15= pd.read_stata(file_paths[2], columns=["pid", "hid", "syear",'bfp0602','bfp0603','bfp0605']).set_index(['pid', 'hid'])
df_15 = data15.rename(columns = {'bfp0602': 'take_revenge_15', 'bfp0603': 'similar_problems_15', 'bfp0605': 'insult_back_15'})


In [6]:
reciprocity_questions_mapping_05 = {
    '[1] Trifft ueberhaupt nicht zu': 1,
    '[2] Skala 1-7': 2,
    '[3] Skala 1-7': 3,
    '[4] Skala 1-7': 4,
    '[5] Skala 1-7': 5,
    '[6] Skala 1-7': 6,
    '[7] Trifft voll zu': 7,
    '[-1] keine Angabe': np.nan,

}

df_05[["similar_problems_05","take_revenge_05","insult_back_05"]] = df_05[["similar_problems_05","take_revenge_05","insult_back_05"]].apply(lambda x: x.map(reciprocity_questions_mapping_05))

reciprocity_questions_mapping_10 = {
    '[1] Trifft ueberhaupt nicht zu': 1,
    '[2] 2 auf Skala 1-7': 2,
    '[3] 3 auf Skala 1-7': 3,
    '[4] 4 auf Skala 1-7': 4,
    '[5] 5 auf Skala 1-7': 5,
    '[6] 6 auf Skala 1-7': 6,
    '[7] Trifft voll zu': 7,
    '[-1] keine Angabe': np.nan,
    '[-5] In Fragebogenversion nicht enthalten': np.nan,

}

df_10[["similar_problems_10","take_revenge_10","insult_back_10"]] = df_10[["similar_problems_10","take_revenge_10","insult_back_10"]].apply(lambda x: x.map(reciprocity_questions_mapping_10))

reciprocity_questions_mapping_15 = {
    '[1] 1 Trifft ueberhaupt nicht zu, (Skala 1-7)': 1,
    '[2] 2 auf Skala 1-7': 2,
    '[3] 3 auf Skala 1-7': 3,
    '[4] 4 auf Skala 1-7': 4,
    '[5] 5 auf Skala 1-7': 5,
    '[6] 6 auf Skala 1-7': 6,
    '[7] 7 Trifft voll zu, (Skala 1-7)': 7,
    '[-1] keine Angabe': -1,

}

df_15[["similar_problems_15","take_revenge_15","insult_back_15"]] = df_15[["similar_problems_15","take_revenge_15","insult_back_15"]].apply(lambda x: x.map(reciprocity_questions_mapping_15))


In [7]:
df = df_05.merge(df_10,how='left', left_on=['pid', 'hid'], right_on=['pid', 'hid'])
df_reciprocity = df.merge(df_15, how= 'left',left_on=['pid', 'hid'], right_on=['pid', 'hid'])
#categoricals need to be transoformed to integers, otherwise dopping negatives is not possible


In [8]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,syear_x,take_revenge_05,similar_problems_05,insult_back_05,syear_y,take_revenge_10,similar_problems_10,insult_back_10
pid,hid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
201,27,2005,1.0,5.0,1.0,,,,
203,60313,2005,2.0,3.0,2.0,,,,
602,60,2005,5.0,4.0,3.0,2010.0,4.0,3.0,4.0
901,94,2005,5.0,3.0,3.0,2010.0,3.0,2.0,5.0
1202,124,2005,1.0,1.0,1.0,,,,
...,...,...,...,...,...,...,...,...,...
8261302,826138,2005,1.0,1.0,1.0,,,,
8261502,826154,2005,4.0,4.0,2.0,,,,
8262002,826200,2005,3.0,2.0,6.0,2010.0,3.0,2.0,6.0
8262402,826243,2005,1.0,1.0,1.0,,,,


In [9]:
df_reciprocity['avg_rec'] = df_reciprocity[['take_revenge_05', 'similar_problems_05', 'insult_back_05', 'take_revenge_10', 'similar_problems_10', 'insult_back_10', 'take_revenge_10', 'similar_problems_10', 'insult_back_10']].mean(axis=1, skipna = True)

In [10]:
df_avg_rec = df_reciprocity.loc[:,['avg_rec']]
df_avg_rec

Unnamed: 0_level_0,Unnamed: 1_level_0,avg_rec
pid,hid,Unnamed: 2_level_1
201,27,2.333333
203,60313,2.333333
602,60,3.777778
901,94,3.444444
1202,124,1.000000
...,...,...
8261302,826138,1.000000
8261502,826154,3.333333
8262002,826200,3.666667
8262402,826243,1.000000


In [11]:
df_avg_rec.to_csv('/Users/maxieschulze/Documents/Dokumente - MacBook Pro von Maxie/5. Semester/Research Module/ResearchModule/src/data_management/rec_avgyears.csv')