# How much agency do we have over our relationships? 
i.e. Do structural or internal factors have a greater effect on divorce?


## Importing libraries

In [155]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

## Uploading Dataset

In [156]:
divorce = pd.read_csv('divorce_df.csv')

In [157]:
# divorce column names
divorce.columns

Index(['age_at_marriage', 'marriage_duration_years', 'num_children',
       'education_level', 'employment_status', 'combined_income',
       'religious_compatibility', 'cultural_background_match',
       'communication_score', 'conflict_frequency',
       'conflict_resolution_style', 'financial_stress_level',
       'mental_health_issues', 'infidelity_occurred', 'counseling_attended',
       'social_support', 'shared_hobbies_count', 'marriage_type',
       'pre_marital_cohabitation', 'domestic_violence_history', 'trust_score',
       'divorced'],
      dtype='object')

## Creating dataframes for structural factors and internal factors

In [168]:
# make new df with structural factors
structural_factors = divorce[['age_at_marriage', 'num_children', 'education_level', 'employment_status', 'marriage_type', 'combined_income', 'religious_compatibility', 'cultural_background_match', 
                        'mental_health_issues', 'social_support', 'divorced']].copy()

structural_factors.head()

Unnamed: 0,age_at_marriage,num_children,education_level,employment_status,marriage_type,combined_income,religious_compatibility,cultural_background_match,mental_health_issues,social_support,divorced
0,30,1,Bachelor,Full-time,Love,64001,Different Religion,1,0,8.428183,1
1,27,2,Master,Full-time,Love,86221,Same Religion,1,0,5.297221,1
2,31,0,High School,Part-time,Arranged,69441,Same Religion,0,0,5.887066,1
3,35,2,Bachelor,Full-time,Love,69513,Not Religious,1,0,5.263555,0
4,26,2,No Formal Education,Full-time,Love,63986,Different Religion,1,0,5.771259,1


## Dealing with categorical variables

### Structural Factors

In [169]:
religious_mapping = {'Not Religious': 0, 'Same Religion': 0, 'Different Religion': 1}
structural_factors['religious_compatibility'] = structural_factors['religious_compatibility'].map(religious_mapping)

In [170]:
# One-hot encoding for employment_status, cultural_background_match, marriage type
structural_factors = pd.get_dummies(
    structural_factors,
    columns=['employment_status', 'marriage_type', 'education_level'],
)
structural_factors.head(10)

Unnamed: 0,age_at_marriage,num_children,combined_income,religious_compatibility,cultural_background_match,mental_health_issues,social_support,divorced,employment_status_Full-time,employment_status_Homemaker,employment_status_Part-time,employment_status_Unemployed,marriage_type_Arranged,marriage_type_Love,marriage_type_Other,education_level_Bachelor,education_level_High School,education_level_Master,education_level_No Formal Education,education_level_PhD
0,30,1,64001,1,1,0,8.428183,1,True,False,False,False,False,True,False,True,False,False,False,False
1,27,2,86221,0,1,0,5.297221,1,True,False,False,False,False,True,False,False,False,True,False,False
2,31,0,69441,0,0,0,5.887066,1,False,False,True,False,True,False,False,False,True,False,False,False
3,35,2,69513,0,1,0,5.263555,0,True,False,False,False,False,True,False,True,False,False,False,False
4,26,2,63986,1,1,0,5.771259,1,True,False,False,False,False,True,False,False,False,False,True,False
5,26,0,44605,0,0,0,6.90935,0,True,False,False,False,False,True,False,False,True,False,False,False
6,35,2,73454,1,1,0,7.351375,0,True,False,False,False,False,True,False,False,True,False,False,False
7,31,1,59491,0,1,0,8.709813,1,False,False,False,True,False,True,False,False,True,False,False,False
8,25,1,40944,0,0,0,8.727489,0,True,False,False,False,True,False,False,True,False,False,False,False
9,30,0,46819,0,1,1,5.939285,1,True,False,False,False,True,False,False,False,False,False,False,True


### Internal Factors

In [171]:
internal_factors = divorce[['communication_score', 'conflict_frequency', 'conflict_resolution_style', 'infidelity_occurred', 'counseling_attended',
                    'pre_marital_cohabitation','domestic_violence_history', 'trust_score', 'divorced']].copy()
internal_factors.head()

Unnamed: 0,communication_score,conflict_frequency,conflict_resolution_style,infidelity_occurred,counseling_attended,pre_marital_cohabitation,domestic_violence_history,trust_score,divorced
0,5.536016,3,Collaborative,0,0,1,0,6.262411,1
1,5.810172,3,Aggressive,1,0,1,0,6.769384,1
2,6.088146,3,Collaborative,0,0,1,0,5.532866,1
3,6.212046,3,Aggressive,0,0,1,0,3.491264,0
4,4.826262,1,Passive,0,1,1,0,10.0,1


In [172]:
# one-hot encoding conflict_resolution_style
internal_factors = pd.get_dummies(
    internal_factors,
    columns=['conflict_resolution_style'],
)
internal_factors.head()

Unnamed: 0,communication_score,conflict_frequency,infidelity_occurred,counseling_attended,pre_marital_cohabitation,domestic_violence_history,trust_score,divorced,conflict_resolution_style_Aggressive,conflict_resolution_style_Avoidant,conflict_resolution_style_Collaborative,conflict_resolution_style_Passive
0,5.536016,3,0,0,1,0,6.262411,1,False,False,True,False
1,5.810172,3,1,0,1,0,6.769384,1,True,False,False,False
2,6.088146,3,0,0,1,0,5.532866,1,False,False,True,False
3,6.212046,3,0,0,1,0,3.491264,0,True,False,False,False
4,4.826262,1,0,1,1,0,10.0,1,False,False,False,True


## Normalisation / Scaling


In [173]:
# z-score scaling (on training dataset) - continuous and ordinal columns
# mean = 0, sd = 1

In [164]:
# do we need to split the data into train and test first?

In [174]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
structural_factors[['age_at_marriage', 'num_children', 'social_support', 'combined_income']] = scaler.fit_transform(
    structural_factors[['age_at_marriage', 'num_children', 'social_support', 'combined_income']]

)
structural_factors.head()

Unnamed: 0,age_at_marriage,num_children,combined_income,religious_compatibility,cultural_background_match,mental_health_issues,social_support,divorced,employment_status_Full-time,employment_status_Homemaker,employment_status_Part-time,employment_status_Unemployed,marriage_type_Arranged,marriage_type_Love,marriage_type_Other,education_level_Bachelor,education_level_High School,education_level_Master,education_level_No Formal Education,education_level_PhD
0,0.497663,-0.443161,0.192314,1,1,0,1.25239,1,True,False,False,False,False,True,False,True,False,False,False,False
1,-0.120194,0.358505,1.322521,0,1,0,-0.340726,1,True,False,False,False,False,True,False,False,False,True,False,False
2,0.703615,-1.244827,0.469017,0,0,0,-0.040598,1,False,False,True,False,True,False,False,False,True,False,False,False
3,1.527425,0.358505,0.472679,0,1,0,-0.357857,0,True,False,False,False,False,True,False,True,False,False,False,False
4,-0.326146,0.358505,0.191551,1,1,0,-0.099523,1,True,False,False,False,False,True,False,False,False,False,True,False


In [175]:
internal_factors[['communication_score', 'conflict_frequency', 'trust_score']] = scaler.fit_transform(
    internal_factors[['communication_score', 'conflict_frequency', 'trust_score']]
)
internal_factors.head()

Unnamed: 0,communication_score,conflict_frequency,infidelity_occurred,counseling_attended,pre_marital_cohabitation,domestic_violence_history,trust_score,divorced,conflict_resolution_style_Aggressive,conflict_resolution_style_Avoidant,conflict_resolution_style_Collaborative,conflict_resolution_style_Passive
0,-0.25698,0.702445,0,0,1,0,0.119978,1,False,False,True,False
1,-0.117329,0.702445,1,0,1,0,0.382288,1,True,False,False,False
2,0.024267,0.702445,0,0,1,0,-0.25749,1,False,False,True,False
3,0.087379,0.702445,0,0,1,0,-1.313822,0,True,False,False,False
4,-0.618518,-0.713775,0,1,1,0,2.053819,1,False,False,False,True
