# How much agency do we have over our relationships? 
i.e. Do structural or internal factors have a greater effect on divorce?


## Importing libraries

In [61]:
import pandas as pd
import numpy as np

## Uploading Dataset

In [62]:
divorce = pd.read_csv('divorce_df.csv')

In [63]:
divorce.head(20)

Unnamed: 0,age_at_marriage,marriage_duration_years,num_children,education_level,employment_status,combined_income,religious_compatibility,cultural_background_match,communication_score,conflict_frequency,...,mental_health_issues,infidelity_occurred,counseling_attended,social_support,shared_hobbies_count,marriage_type,pre_marital_cohabitation,domestic_violence_history,trust_score,divorced
0,30,1,1,Bachelor,Full-time,64001,Different Religion,1,5.536016,3,...,0,0,0,8.428183,5,Love,1,0,6.262411,1
1,27,2,2,Master,Full-time,86221,Same Religion,1,5.810172,3,...,0,1,0,5.297221,1,Love,1,0,6.769384,1
2,31,6,0,High School,Part-time,69441,Same Religion,0,6.088146,3,...,0,0,0,5.887066,1,Arranged,1,0,5.532866,1
3,35,3,2,Bachelor,Full-time,69513,Not Religious,1,6.212046,3,...,0,0,0,5.263555,5,Love,1,0,3.491264,0
4,26,2,2,No Formal Education,Full-time,63986,Different Religion,1,4.826262,1,...,0,0,1,5.771259,4,Love,1,0,10.0,1
5,26,10,0,High School,Full-time,44605,Same Religion,0,6.825964,3,...,0,0,0,6.90935,3,Love,1,0,4.628149,0
6,35,10,2,High School,Full-time,73454,Different Religion,1,3.775491,1,...,0,0,0,7.351375,1,Love,1,0,5.753932,0
7,31,17,1,High School,Unemployed,59491,Not Religious,1,6.613021,4,...,0,0,0,8.709813,2,Love,0,0,7.010559,1
8,25,5,1,Bachelor,Full-time,40944,Not Religious,0,2.963345,0,...,0,0,0,8.727489,3,Arranged,1,0,2.428385,0
9,30,1,0,PhD,Full-time,46819,Same Religion,1,3.612109,1,...,1,0,0,5.939285,1,Arranged,1,0,8.750676,1


In [64]:
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

## Creating dataframes for structural factors and internal factors

In [65]:
# remove employment_status, 

In [66]:
# make new df with structural variables
structural_variables = divorce[['age_at_marriage', 'num_children', 'education_level', 'combined_income', 'religious_compatibility', 'cultural_background_match', 
                        'marriage_type', 'mental_health_issues', 'divorced']].copy()

structural_variables.head()

Unnamed: 0,age_at_marriage,num_children,education_level,combined_income,religious_compatibility,cultural_background_match,marriage_type,mental_health_issues,divorced
0,30,1,Bachelor,64001,Different Religion,1,Love,0,1
1,27,2,Master,86221,Same Religion,1,Love,0,1
2,31,0,High School,69441,Same Religion,0,Arranged,0,1
3,35,2,Bachelor,69513,Not Religious,1,Love,0,0
4,26,2,No Formal Education,63986,Different Religion,1,Love,0,1


In [67]:
# making categorical variables numeric
education_mapping = {'No Formal Education': 0, 'High School': 1, 'Bachelor': 2, 'Master': 3, 'PhD': 4}
structural_variables['education_level'] = structural_variables['education_level'].map(education_mapping)
religious_mapping = {'Not Religious': 0, 'Same Religion': 0, 'Different Religion': 1}
structural_variables['religious_compatibility'] = structural_variables['religious_compatibility'].map(religious_mapping)
marriage_type_mapping = {'Love': 0, 'Arranged': 1}
structural_variables['marriage_type'] = structural_variables['marriage_type'].map(marriage_type_mapping)
structural_variables.head()

Unnamed: 0,age_at_marriage,num_children,education_level,combined_income,religious_compatibility,cultural_background_match,marriage_type,mental_health_issues,divorced
0,30,1,2,64001,1,1,0.0,0,1
1,27,2,3,86221,0,1,0.0,0,1
2,31,0,1,69441,0,0,1.0,0,1
3,35,2,2,69513,0,1,0.0,0,0
4,26,2,0,63986,1,1,0.0,0,1


In [68]:
# make new df with internal factors
internal_factors = divorce[['communication_score', 'conflict_frequency', 'conflict_resolution_style', 'infidelity_occurred', 'counseling_attended', 'social_support',
                    'pre_marital_cohabitation','domestic_violence_history', 'trust_score', 'divorced']].copy()
internal_factors.head()

Unnamed: 0,communication_score,conflict_frequency,conflict_resolution_style,infidelity_occurred,counseling_attended,social_support,pre_marital_cohabitation,domestic_violence_history,trust_score,divorced
0,5.536016,3,Collaborative,0,0,8.428183,1,0,6.262411,1
1,5.810172,3,Aggressive,1,0,5.297221,1,0,6.769384,1
2,6.088146,3,Collaborative,0,0,5.887066,1,0,5.532866,1
3,6.212046,3,Aggressive,0,0,5.263555,1,0,3.491264,0
4,4.826262,1,Passive,0,1,5.771259,1,0,10.0,1


In [69]:
conflict_resolution_mapping = {'Avoidant': 0, 'Aggressive': 1, 'Passive': 2, 'Collaborative': 3}
internal_factors['conflict_resolution_style'] = internal_factors['conflict_resolution_style'].map(conflict_resolution_mapping)
internal_factors.head()

Unnamed: 0,communication_score,conflict_frequency,conflict_resolution_style,infidelity_occurred,counseling_attended,social_support,pre_marital_cohabitation,domestic_violence_history,trust_score,divorced
0,5.536016,3,3,0,0,8.428183,1,0,6.262411,1
1,5.810172,3,1,1,0,5.297221,1,0,6.769384,1
2,6.088146,3,3,0,0,5.887066,1,0,5.532866,1
3,6.212046,3,1,0,0,5.263555,1,0,3.491264,0
4,4.826262,1,2,0,1,5.771259,1,0,10.0,1


## Correlation Matrix

In [None]:
# hypothesis testing 
# logistic regression for internal factors
from statsmodels.formula.api import logit
internal_model = logit('divorced ~ communication_score + conflict_frequency + infidelity_occurred + counseling_attended + social_support + pre_marital_cohabitation + domestic_violence_history + trust_score', data=internal_factors).fit()
print(internal_model.summary())


Optimization terminated successfully.
         Current function value: 0.661097
         Iterations 4
                           Logit Regression Results                           
Dep. Variable:               divorced   No. Observations:                 5000
Model:                          Logit   Df Residuals:                     4991
Method:                           MLE   Df Model:                            8
Date:                Mon, 08 Dec 2025   Pseudo R-squ.:                 0.01663
Time:                        17:42:23   Log-Likelihood:                -3305.5
converged:                       True   LL-Null:                       -3361.4
Covariance Type:            nonrobust   LLR p-value:                 1.639e-20
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------
Intercept                     0.5300      0.169      3.135      0.002       0.

In [None]:
# hypothesis testing 
# logistic regression for structural variables
structural_model = logit('divorced ~ age_at_marriage + num_children + education_level + employment_status + combined_income + religious_compatibility + cultural_background_match + marriage_type', data=structural_variables).fit()
print(structural_model.summary())

Optimization terminated successfully.
         Current function value: 0.670714
         Iterations 4
                           Logit Regression Results                           
Dep. Variable:               divorced   No. Observations:                 5000
Model:                          Logit   Df Residuals:                     4984
Method:                           MLE   Df Model:                           15
Date:                Mon, 08 Dec 2025   Pseudo R-squ.:                0.002322
Time:                        17:40:59   Log-Likelihood:                -3353.6
converged:                       True   LL-Null:                       -3361.4
Covariance Type:            nonrobust   LLR p-value:                    0.4083
                                               coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------------------------
Intercept                                   -0.5