In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
from scipy import stats

### 1. Extract relevant columns

List of manifest and latent variables:

In [2]:
manifests = {'pa18i1':  {'max': 5,  'min': 1, 'inverted': True,  'desc': 'Partner finds it all right if I stand up for my own interests'}, 
             'pa18i2':  {'max': 5,  'min': 1, 'inverted': False, 'desc': 'Sometimes I am afraid that partner would rather spend time with others'}, 
             'pa18i4':  {'max': 5,  'min': 1, 'inverted': False, 'desc': 'Partner clings to me so much that I feel like I am suffocating'}, 
             'pa18i6':  {'max': 5,  'min': 1, 'inverted': True,  'desc': 'I can settle my personal matters by myself without conflicts'}, 
             'pa18i7':  {'max': 5,  'min': 1, 'inverted': False, 'desc': 'I have the feeling that I like partner more than he/she likes me'}, 
             'pa18i10': {'max': 5,  'min': 1, 'inverted': False, 'desc': 'Sometimes not sure if partner enjoys being with me as much as I'}, 
             'pa18i11': {'max': 5,  'min': 1, 'inverted': True,  'desc': 'I can usually do what I want'}, 
             'pa18i12': {'max': 5,  'min': 1, 'inverted': False, 'desc': 'Afraid partner will think I am silly/stupid if I make a mistake'}, 
             'pa18i14': {'max': 5,  'min': 1, 'inverted': False, 'desc': 'Partner clings to me so tightly that I cannot do what I want '}, 
             'pa18i15': {'max': 5,  'min': 1, 'inverted': False, 'desc': 'When I disappoint/annoy partner, I am afraid he/she will not like me'}, 
             'pa18i16': {'max': 5,  'min': 1, 'inverted': True,  'desc': 'I can follow own interests without partner getting upset'}, 
             'per1i2':  {'max': 5,  'min': 1, 'inverted': False, 'desc': 'Sometimes I believe that I am worthless'}, 
             'per1i7':  {'max': 5,  'min': 1, 'inverted': True,  'desc': 'I like myself just the way I am'}, 
             'per1i13': {'max': 5,  'min': 1, 'inverted': True,  'desc': 'All in all, I am pleased with myself'},                      
             
             'per1i6':  {'max': 5,  'min': 1, 'inverted': False, 'desc': 'I feel lonely'}, 
             'sat6':    {'max': 10, 'min': 0, 'inverted': False, 'desc': 'General satisfaction with life'}}

latents   = {'attAvd':  {'manifests': ['pa18i4', 'pa18i14', 'pa18i1', 'pa18i6', 'pa18i11', 'pa18i16'], 'desc': 'Attachment Anxiety'},
             'attAnx':  {'manifests': ['pa18i7', 'pa18i10', 'pa18i2', 'pa18i12', 'pa18i15', 'per1i2', 'per1i7', 'per1i13'], 'desc': 'Attachment Avoidance'}}

Mapping, which variables were observed at the different measurement occasions:

In [3]:
aval_cols = {1:  ['sat6', 'per1i6'] + latents['attAvd']['manifests'] + latents['attAnx']['manifests'],
             2:  ['sat6'],
             3:  ['sat6']           + latents['attAvd']['manifests'] + latents['attAnx']['manifests'],
             4:  ['sat6', 'per1i6'],
             5:  ['sat6', 'per1i6'] + latents['attAvd']['manifests'] + latents['attAnx']['manifests'],
             6:  ['sat6'],
             7:  ['sat6', 'per1i6'] + latents['attAvd']['manifests'] + latents['attAnx']['manifests'],
             8:  ['sat6', 'per1i6'],
             9:  ['sat6', 'per1i6'] + latents['attAvd']['manifests'] + latents['attAnx']['manifests'],
             10: ['sat6', 'per1i6'],
             11: ['sat6', 'per1i6'] + latents['attAvd']['manifests'] + latents['attAnx']['manifests']}

Load the data for the relevant columns:

In [4]:
data_anchor = {}
for wave in [1,2,3,4,5,6,7,8,9,10,11]:
    filepath = '../../data/pairfam_v11/Data/Stata/anchor'+str(wave)+'.dta'
    columns = ['sample', 'wave', 'id', 'pid', 'sex_gen', 'original_doby'] + aval_cols[wave]
    data_anchor[wave] = pd.read_stata(filepath, columns=columns, convert_categoricals=False)
    
data_partner = {}
for wave in [1,2,3,4,5,6,7,8,9,10,11]:
    filepath = '../../data/pairfam_v11/Data/Stata/partner'+str(wave)+'.dta'
    columns = ['sample', 'wave', 'id', 'pid', 'psex', 'pdoby'] + ['p'+col for col in aval_cols[wave]]
    data_partner[wave] = pd.read_stata(filepath, columns=columns, convert_categoricals=False)

### 2. Merge data from different waves

In [5]:
data_anchor  = pd.concat(data_anchor.values(), ignore_index=True)
data_partner = pd.concat(data_partner.values(), ignore_index=True)

### 3. Keep only the main pairfam sample

In [6]:
data_anchor  = data_anchor[data_anchor['sample']==1]
data_anchor.drop('sample', axis=1, inplace=True)
data_partner = data_partner[data_partner['sample']==1]
data_partner.drop('sample', axis=1, inplace=True)

In [7]:
data_partner[(data_partner['pper1i6']>0) & (data_partner['wave']==11)]

Unnamed: 0,wave,id,pid,psex,pdoby,psat6,pper1i6,ppa18i4,ppa18i14,ppa18i1,...,ppa18i11,ppa18i16,ppa18i7,ppa18i10,ppa18i2,ppa18i12,ppa18i15,pper1i2,pper1i7,pper1i13
24964,11,10250000,10250102,1,1981,8,1.0,4.0,2.0,3.0,...,2.0,3.0,1.0,1.0,1.0,2.0,2.0,1.0,4.0,4.0
24968,11,11132000,11132103,2,1984,9,1.0,1.0,1.0,4.0,...,4.0,5.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,3.0
24972,11,13404000,13404102,2,1991,8,2.0,2.0,2.0,4.0,...,4.0,4.0,1.0,1.0,1.0,2.0,2.0,3.0,4.0,4.0
24975,11,14425000,14425104,1,1991,10,1.0,1.0,1.0,4.0,...,3.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,5.0,4.0
24996,11,23561000,23561102,1,1972,10,1.0,1.0,1.0,3.0,...,4.0,4.0,1.0,1.0,2.0,1.0,2.0,2.0,4.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26305,11,717754000,717754103,2,1973,8,2.0,2.0,2.0,5.0,...,4.0,4.0,1.0,1.0,1.0,1.0,2.0,5.0,3.0,2.0
26318,11,727442000,727442101,2,1972,9,1.0,1.0,1.0,5.0,...,5.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,5.0
26333,11,732611000,732611103,1,1992,10,1.0,1.0,1.0,5.0,...,4.0,4.0,2.0,1.0,1.0,1.0,1.0,1.0,5.0,5.0
26338,11,734104000,734104102,1,1975,6,3.0,3.0,2.0,5.0,...,3.0,4.0,3.0,2.0,3.0,1.0,1.0,2.0,5.0,5.0


In [8]:
data_anchor[data_anchor.id==734104000]

Unnamed: 0,wave,id,pid,sex_gen,original_doby,sat6,per1i6,pa18i4,pa18i14,pa18i1,...,pa18i11,pa18i16,pa18i7,pa18i10,pa18i2,pa18i12,pa18i15,per1i2,per1i7,per1i13
12142,1,734104000,734104101.0,2,1973,9,1.0,1.0,1.0,5.0,...,3.0,5.0,1.0,1.0,2.0,2.0,1.0,1.0,4.0,4.0
21271,2,734104000,734104101.0,2,1973,9,,,,,...,,,,,,,,,,
29195,3,734104000,734104101.0,2,1973,8,,1.0,1.0,5.0,...,5.0,5.0,1.0,1.0,1.0,1.0,1.0,2.0,4.0,4.0
37392,4,734104000,734104101.0,2,1973,8,1.0,,,,...,,,,,,,,,,
44738,5,734104000,,2,1973,4,2.0,-3.0,-3.0,-3.0,...,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0,3.0,4.0,4.0
51444,6,734104000,,2,1973,7,,,,,...,,,,,,,,,,
57458,7,734104000,734104102.0,2,1973,7,1.0,5.0,1.0,4.0,...,3.0,5.0,1.0,1.0,2.0,2.0,1.0,3.0,4.0,4.0
62993,8,734104000,734104102.0,2,1973,3,4.0,,,,...,,,,,,,,,,
68158,9,734104000,734104102.0,2,1973,6,1.0,1.0,1.0,5.0,...,4.0,5.0,1.0,1.0,1.0,1.0,1.0,3.0,4.0,4.0
72967,10,734104000,734104102.0,2,1973,6,1.0,,,,...,,,,,,,,,,


Analyze sample size:

In [9]:
summary = pd.DataFrame(columns=['w1', 'w2', 'w3', 'w4', 'w5', 'w6', 'w7', 'w8', 'w9', 'w10', 'w11'])

for wave in range(1,12):
    N_all = sum(data_anchor.wave==wave)
    N_inRelationship = sum((data_anchor.wave==wave) & (data_anchor.pid > 0))
    N_partnerData = sum(data_anchor[data_anchor.wave==wave].pid.isin(data_partner[data_partner.wave==wave].pid))
    summary['w'+str(wave)] = [N_all, N_inRelationship, N_partnerData]

summary.index = ['Total number of subjects', 'among those: in a relationship', 'among those: partners participated']
summary

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,w11
Total number of subjects,12402,9069,7901,6999,6261,5696,5119,4727,4424,4102,3808
among those: in a relationship,7234,5408,4892,4556,4259,3930,3635,3427,3247,3066,2910
among those: partners participated,3743,2687,2362,2182,2039,1922,1792,1729,1626,1493,1412


### 4. Exclude singles

In [10]:
data_anchor.dropna(subset=['pid'], inplace=True)

### 5. Mask missing values

In [11]:
data_anchor[data_anchor<0] = np.nan
data_partner[data_partner<0] = np.nan

Analyze descriptives for anchors:

In [12]:
desc_anchor = pd.DataFrame()

for col in manifests:
    N = data_anchor[[col, 'wave']].groupby('wave').count().T.add_prefix('N_')
    m = np.round(data_anchor[col].mean(),3)
    sd = np.round(data_anchor[col].std(),3)
    desc_anchor = desc_anchor.append(pd.DataFrame({'desc': manifests[col]['desc'], 
                                                   'min': manifests[col]['min'], 'max': manifests[col]['max'], 
                                                   'mean': m, 'sd': sd}, index=[col]).join(N))   
desc_anchor

Unnamed: 0,desc,min,max,mean,sd,N_1,N_2,N_3,N_4,N_5,N_6,N_7,N_8,N_9,N_10,N_11
pa18i1,Partner finds it all right if I stand up for m...,1,5,3.871,0.927,7030,0,4803,0,4199,0,3591,0,3207,0,2872
pa18i2,Sometimes I am afraid that partner would rathe...,1,5,1.789,1.004,7074,0,4820,0,4228,0,3608,0,3223,0,2888
pa18i4,Partner clings to me so much that I feel like ...,1,5,1.625,0.939,7094,0,4828,0,4228,0,3607,0,3223,0,2891
pa18i6,I can settle my personal matters by myself wit...,1,5,3.678,0.988,7047,0,4813,0,4207,0,3602,0,3209,0,2875
pa18i7,I have the feeling that I like partner more th...,1,5,1.728,1.027,6950,0,4757,0,4159,0,3552,0,3174,0,2849
pa18i10,Sometimes not sure if partner enjoys being wit...,1,5,1.763,1.111,7001,0,4759,0,4179,0,3567,0,3176,0,2858
pa18i11,I can usually do what I want,1,5,3.485,1.045,7076,0,4819,0,4215,0,3606,0,3212,0,2890
pa18i12,Afraid partner will think I am silly/stupid if...,1,5,1.682,0.973,7104,0,4825,0,4219,0,3605,0,3211,0,2887
pa18i14,Partner clings to me so tightly that I cannot ...,1,5,1.471,0.853,7120,0,4826,0,4231,0,3606,0,3216,0,2891
pa18i15,"When I disappoint/annoy partner, I am afraid h...",1,5,1.687,0.966,7083,0,4820,0,4223,0,3598,0,3213,0,2883


Analyze descriptives for partners:

In [13]:
desc_partner = pd.DataFrame()

for manifest in manifests:
    col='p'+manifest
    N = data_partner[[col, 'wave']].groupby('wave').count().T.add_prefix('w')
    m = np.round(data_partner[col].mean(),3)
    sd = np.round(data_partner[col].std(),3)
    desc_partner = desc_partner.append(pd.DataFrame({'desc': manifests[manifest]['desc'], 
                                                     'min': manifests[manifest]['min'], 'max': manifests[manifest]['max'], 
                                                     'mean': m, 'sd': sd}, index=[col]).join(N))   
    
desc_partner

Unnamed: 0,desc,min,max,mean,sd,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,w11
ppa18i1,Partner finds it all right if I stand up for m...,1,5,3.811,1.006,3552,0,2346,0,2033,0,1786,0,1613,0,1407
ppa18i2,Sometimes I am afraid that partner would rathe...,1,5,1.84,1.061,3662,0,2350,0,2031,0,1788,0,1615,0,1407
ppa18i4,Partner clings to me so much that I feel like ...,1,5,1.465,0.84,3662,0,2345,0,2035,0,1785,0,1611,0,1405
ppa18i6,I can settle my personal matters by myself wit...,1,5,3.679,1.056,3590,0,2347,0,2035,0,1784,0,1615,0,1407
ppa18i7,I have the feeling that I like partner more th...,1,5,1.765,1.078,3520,0,2345,0,2034,0,1783,0,1613,0,1401
ppa18i10,Sometimes not sure if partner enjoys being wit...,1,5,1.817,1.124,3537,0,2343,0,2032,0,1786,0,1614,0,1400
ppa18i11,I can usually do what I want,1,5,3.404,1.104,3607,0,2348,0,2028,0,1786,0,1614,0,1401
ppa18i12,Afraid partner will think I am silly/stupid if...,1,5,1.728,1.044,3646,0,2348,0,2029,0,1789,0,1614,0,1405
ppa18i14,Partner clings to me so tightly that I cannot ...,1,5,1.366,0.761,3661,0,2346,0,2031,0,1785,0,1614,0,1406
ppa18i15,"When I disappoint/annoy partner, I am afraid h...",1,5,1.695,0.997,3628,0,2349,0,2030,0,1786,0,1613,0,1405


### 6. Invert variable scales

In [14]:
for manifest in manifests: 
    if(manifests[manifest]['inverted']): 
        data_anchor[manifest] = manifests[manifest]['max'] - data_anchor[manifest] + manifests[manifest]['min']
        data_partner['p'+manifest] = manifests[manifest]['max'] - data_partner['p'+manifest] + manifests[manifest]['min']

Compute Cronbach alpha's for the different scales:

In [15]:
scales = {'Engulfment anxiety     ': ['pa18i4', 'pa18i14'],
          'Autonomy               ': ['pa18i1', 'pa18i6', 'pa18i11', 'pa18i16'],
          'Ambivalence            ': ['pa18i7', 'pa18i10'],
          'Fear of love withdrawal': ['pa18i2', 'pa18i12', 'pa18i15'],
          'Self-esteem            ': ['per1i2', 'per1i7', 'per1i13'],
          
          'Attachment Anxiety     ': ['pa18i4', 'pa18i14', 'pa18i1', 'pa18i6', 'pa18i11', 'pa18i16'],
          'Attachment Avoidance   ': ['pa18i7', 'pa18i10', 'pa18i2', 'pa18i12', 'pa18i15', 'per1i2', 'per1i7', 'per1i13']}

In [16]:
for scale in scales:
    N = len(scales[scale])
    mean_r = np.mean((data_anchor[scales[scale]].corr().sum()-1)/(N-1))
    cronbach_alpha = (N * mean_r) / (1 + (N - 1) * mean_r)
    print(scale + ':\t' + str(round(cronbach_alpha,3)))

Engulfment anxiety     :	0.741
Autonomy               :	0.75
Ambivalence            :	0.611
Fear of love withdrawal:	0.685
Self-esteem            :	0.76
Attachment Anxiety     :	0.746
Attachment Avoidance   :	0.778


### 7. Increase sample size:

Check t-statistic:

In [27]:
summary = pd.DataFrame(columns = ['ID', 'Description', 't-value', 'p-value', 'mean (anchor)', 'mean (partner)', 'sd (anchor)', 'sd (partner)'])

for manifest in manifests:
    val_a = data_anchor[manifest]
    val_p = data_partner['p'+manifest]
    t     = np.round(stats.ttest_ind(val_a, val_p, nan_policy='omit')[0],7)
    p     = np.round(stats.ttest_ind(val_a, val_p, nan_policy='omit')[1],7)
    m_a   = np.round(val_a.mean(),3)
    m_p   = np.round(val_p.mean(),3)
    sd_a  = np.round(val_a.std(),3)
    sd_p  = np.round(val_p.std(),3)
    summary.loc[len(summary)] = [manifest, manifests[manifest]['desc'], t, p, m_a, m_p, sd_a, sd_p]

pd.set_option('display.max_colwidth', None)
summary

Unnamed: 0,ID,Description,t-value,p-value,mean (anchor),mean (partner),sd (anchor),sd (partner)
0,pa18i1,Partner finds it all right if I stand up for my own interests,-5.812391,0.0,2.129,2.189,0.927,1.006
1,pa18i2,Sometimes I am afraid that partner would rather spend time with others,-4.616188,4e-06,1.789,1.84,1.004,1.061
2,pa18i4,Partner clings to me so much that I feel like I am suffocating,16.311184,0.0,1.625,1.465,0.939,0.84
3,pa18i6,I can settle my personal matters by myself without conflicts,0.069911,0.944265,2.322,2.321,0.988,1.056
4,pa18i7,I have the feeling that I like partner more than he/she likes me,-3.184129,0.001453,1.728,1.765,1.027,1.078
5,pa18i10,Sometimes not sure if partner enjoys being with me as much as I,-4.493605,7e-06,1.763,1.817,1.111,1.124
6,pa18i11,I can usually do what I want,-7.02628,0.0,2.515,2.596,1.045,1.104
7,pa18i12,Afraid partner will think I am silly/stupid if I make a mistake,-4.265536,2e-05,1.682,1.728,0.973,1.044
8,pa18i14,Partner clings to me so tightly that I cannot do what I want,11.8183,0.0,1.471,1.366,0.853,0.761
9,pa18i15,"When I disappoint/annoy partner, I am afraid he/she will not like me",-0.806019,0.420237,1.687,1.695,0.966,0.997


In [21]:
manifests[manifest]

{'max': 5,
 'min': 1,
 'inverted': True,
 'desc': 'Partner finds it all right if I stand up for my own interests'}

Make labels in both datasets match each other:

In [47]:
data_partner = data_partner.rename(columns={'pid': 'pid', 'id': 'ppid'}) # Rename IDs so that next row works for them too
data_partner.columns = data_partner.columns.str.replace('^p', '') # Remove prefix 'p'
data_anchor = data_anchor.rename(columns={'sex_gen': 'sex', 'original_doby': 'doby'}) # Rename column labels so they match

Standardize variables:

In [48]:
data_anchor[list(manifests.keys())] = (data_anchor[manifests]-data_anchor[manifests].mean())/data_anchor[manifests].std()
data_partner[list(manifests.keys())] = (data_partner[manifests]-data_partner[manifests].mean())/data_partner[manifests].std()

Append the two dataframes:

In [49]:
data_anchor['subj_type'] = 'anchor'
data_partner['subj_type'] = 'partner'
data = pd.concat([data_anchor, data_partner], ignore_index=True)

### 8. Calculate latent variables:

In [50]:
for lat in latents:
    data[lat] = data[latents[lat]['manifests']].mean(axis=1, skipna=False)
    #data.drop(latents[lat]['manifests'], axis=1, inplace=True)

### 9. Calculate age column

In [51]:
data[data.id==907000].T

Unnamed: 0,1,7235,12643,17535,22091,26350,30279,33914
wave,1,2,3,4,5,6,7,8
id,907000,907000,907000,907000,907000,907000,907000,907000
pid,907101,907101,907101,907101,907101,907101,907101,907101
sex,2,2,2,2,2,2,2,2
doby,1981,1981,1981,1981,1981,1981,1981,1981
sat6,-3.69284,-1.76566,0.161513,1.4463,0.161513,1.4463,1.4463,0.161513
per1i6,0.299824,,,-0.704533,-0.704533,,-0.704533,0.299824
pa18i4,1.46426,,-0.665372,,-0.665372,,-0.665372,
pa18i14,1.79261,,-0.552786,,-0.552786,,-0.552786,
pa18i1,0.939143,,-1.21785,,-1.21785,,-1.21785,


In [52]:
data['age'] = 2009-data['doby']+data['wave']
data.drop('doby', axis=1, inplace=True)

### 10. Combine anchor and partner data:

In [53]:
data_join = data.copy()
for col in data_join.columns.difference(['wave','id','pid']):
    data_join.rename(columns={col:'p'+col}, inplace=True)
    
data = data.merge(data_join, left_on=['wave','id'], right_on=['wave','pid'], how='left', suffixes=['', '_p']) 
data.drop(['id_p', 'pid_p'], axis=1, inplace=True)

### Export data

In [55]:
data.to_csv('../../data/samples/data8.csv', index=False)