# Import libraries

In [None]:
import pandas as pd
import seaborn as sns

# Read data from csv file

In [None]:
meta = pd.read_csv('../input/divorce-prediction/divorce_data.csv', delimiter = ';')
meta.head()

# Check shape of the data

In [None]:
meta.shape

# Correlation of features

In [None]:
sns.heatmap(meta.corr())
sns.set(rc={'figure.figsize':(30,15)})

### Hence, every feature column has satisfactory correlation with 'Divorce' - our Target column

# Distribution of Divorce categories

In [None]:
ax = sns.barplot(x = meta['Divorce'].unique(), y = meta['Divorce'].value_counts(), palette = 'mako')
sns.set(font_scale=1.5)
ax.set(xlabel = 'Divorce', ylabel = 'Count')
ax.set(title = 'Distribution of Divorced and Non-Divorced couples (0 = Not Divorced, 1 = Divorced)')

 ### Hence, not a skewed dataset in terms of target distribution values (approximately same count of 0's and 1's).

# Divide dataset into Features and Target

In [None]:
X = meta.iloc[:,:54]
y = meta.iloc[:,-1]

# Split the datatset into train and test datatset

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X , y , random_state = 42)

# Import classification models and metrics for evaluation

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from lightgbm import LGBMClassifier

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

# Random Forest Classifier

In [None]:
RFC = RandomForestClassifier(n_estimators = 200, random_state = 42)
RFC.fit(X_train , y_train)
RFC_preds = RFC.predict(X_test)
print('Classification Report:\n',classification_report(y_test , RFC_preds))
print('Accuracy Score:',accuracy_score(y_test , RFC_preds)*100)

# XGBoost Classifier

In [None]:
XGB = XGBClassifier(n_estimators = 200, max_depth = 6, learning_rate = 0.01)
XGB.fit(X_train,y_train)
XGB_preds = XGB.predict(X_test)
print('Classification Report:\n',classification_report(y_test , XGB_preds))
print('Accuracy Score:',accuracy_score(y_test, XGB_preds)*100)

# ADABoost Classifier

In [None]:
ADC = AdaBoostClassifier(n_estimators = 200, random_state = 42)
ADC.fit(X_train,y_train)
ADC_preds = ADC.predict(X_test)
print('Classification Report:\n',classification_report(y_test , ADC_preds))
print('Accuracy Score:',accuracy_score(y_test, ADC_preds)*100)

# LGBM Classifier

In [None]:
LGBM = LGBMClassifier(random_state = 42)
LGBM.fit(X_train,y_train)
LGBM_preds = LGBM.predict(X_test)
print('Classification Report:\n', classification_report(y_test , LGBM_preds))
print('Accuracy Score:',accuracy_score(y_test, LGBM_preds)*100)

## Every model seems to have similar accuracy over the test datatset