## Imports

In [None]:
import pandas as pd
from sdv.single_table import CTGANSynthesizer
from sdv.metadata import SingleTableMetadata

## Data loading and overview

In [96]:
df = pd.read_csv("QA_encoded.csv")

In [97]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135 entries, 0 to 134
Data columns (total 44 columns):
 #   Column                                 Non-Null Count  Dtype 
---  ------                                 --------------  ----- 
 0   id                                     135 non-null    int64 
 1   Age                                    135 non-null    int64 
 2   Gender                                 135 non-null    int64 
 3   Current_residence                      135 non-null    int64 
 4   District                               135 non-null    int64 
 5   Parent_occupation                      135 non-null    int64 
 6   Displacement                           135 non-null    int64 
 7   Displaced_months                       135 non-null    int64 
 8   Parent_current_employment              135 non-null    int64 
 9   Experiencing_bombs                     135 non-null    int64 
 10  Family_member_loss                     135 non-null    int64 
 11  House_destruction  

## Oversampling using CT-GAN

In [98]:
minor = df[df['Mental_health_category'] == 'Needs help']
medium = df[df['Mental_health_category'] == 'Stable']
major = df[df['Mental_health_category'] == 'Thriving']

In [99]:
# augmenting minor class

metadata = SingleTableMetadata()
metadata.detect_from_dataframe(minor)
metadata.update_column(column_name='id', sdtype='id')
metadata.set_primary_key(column_name='id')

model = CTGANSynthesizer(metadata)
model.fit(minor)

new_minor = model.sample(90)



In [100]:
# augmenting medium class

metadata = SingleTableMetadata()
metadata.detect_from_dataframe(medium)
metadata.update_column(column_name='id', sdtype='id')
metadata.set_primary_key(column_name='id')

model = CTGANSynthesizer(metadata)
model.fit(medium)

new_medium = model.sample(100)



In [101]:
major

Unnamed: 0,id,Age,Gender,Current_residence,District,Parent_occupation,Displacement,Displaced_months,Parent_current_employment,Experiencing_bombs,...,Attend_school_during,Attend_school_currently,Academic_performance_after,Participation_in_social_activities,Current_need_of_psychological_support,Current_need_of_educational_materials,Current_need_of_safe_housing,Current_need_of_medical_care,Current_need_of_food/water,Mental_health_category
0,0,8,1,1,1,3,2,0,3,2,...,1,1,2,1,3,2,2,2,2,Thriving
1,1,8,2,1,4,1,1,2,1,1,...,2,1,1,1,2,2,2,2,2,Thriving
3,3,8,1,1,4,1,1,2,1,1,...,4,1,1,1,1,2,2,2,2,Thriving
6,6,9,1,1,4,1,1,4,1,1,...,4,1,1,1,2,2,2,2,2,Thriving
7,7,8,2,2,4,1,1,5,1,1,...,2,2,2,2,3,2,2,2,2,Thriving
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,126,10,1,1,4,1,1,3,1,2,...,1,1,2,2,2,2,2,2,2,Thriving
128,128,9,1,1,4,1,1,2,1,1,...,2,1,1,1,2,2,2,2,2,Thriving
129,129,10,2,1,4,1,1,1,1,1,...,3,1,1,2,2,2,2,2,2,Thriving
130,130,8,2,1,4,1,1,2,1,1,...,4,1,1,1,2,2,2,2,2,Thriving


In [102]:
augmented_df = pd.concat([major, new_medium, new_minor], ignore_index=True)
augmented_df

Unnamed: 0,id,Age,Gender,Current_residence,District,Parent_occupation,Displacement,Displaced_months,Parent_current_employment,Experiencing_bombs,...,Attend_school_during,Attend_school_currently,Academic_performance_after,Participation_in_social_activities,Current_need_of_psychological_support,Current_need_of_educational_materials,Current_need_of_safe_housing,Current_need_of_medical_care,Current_need_of_food/water,Mental_health_category
0,0,8,1,1,1,3,2,0,3,2,...,1,1,2,1,3,2,2,2,2,Thriving
1,1,8,2,1,4,1,1,2,1,1,...,2,1,1,1,2,2,2,2,2,Thriving
2,3,8,1,1,4,1,1,2,1,1,...,4,1,1,1,1,2,2,2,2,Thriving
3,6,9,1,1,4,1,1,4,1,1,...,4,1,1,1,2,2,2,2,2,Thriving
4,7,8,2,2,4,1,1,5,1,1,...,2,2,2,2,3,2,2,2,2,Thriving
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289,13994480,10,2,1,6,1,2,0,3,1,...,2,1,1,2,1,3,2,2,2,Needs help
290,1252651,8,2,1,2,1,2,0,1,1,...,3,1,1,2,1,2,2,2,2,Needs help
291,15540153,9,2,1,3,1,2,0,1,1,...,1,1,2,2,1,2,2,2,2,Needs help
292,7301618,10,2,1,1,1,2,0,3,1,...,1,1,1,1,1,3,2,2,2,Needs help


In [103]:
augmented_df.drop("id", axis = 1, inplace=True)

## Feature engineering

In [104]:
# this index measures the symptoms before the war with lowest 0 and highest 5 for each attribute
augmented_df['index_before'] = augmented_df[['Sleeping_before', 'Nightmares_before','Worry/fear_before', 
                         'Aggressive_behavior_before', 'Family/friends_withdrawal_before',
                         'Mood swings_before']].mean(axis=1)

# this index measures the symptoms after the war with lowest 0 and highest 5 for each attribute
augmented_df['index_after'] = augmented_df[['Hypervigilance_after', 'Nightmares_after','Worry/fear_after', 
                         'Aggressive_behavior_after', 'Family/friends_withdrawal_after',
                         'Mood_swings_after']].mean(axis=1)
augmented_df['index_before'] = augmented_df['index_before'].round(2)
augmented_df['index_after'] = augmented_df['index_after'].round(2)

In [105]:
cols = [ 'Sleeping_before', 'Nightmares_before','Worry/fear_before', 
        'Aggressive_behavior_before', 'Family/friends_withdrawal_before',
        'Mood swings_before', 'Hypervigilance_after', 'Nightmares_after','Worry/fear_after', 
        'Aggressive_behavior_after', 'Family/friends_withdrawal_after',
        'Mood_swings_after']
augmented_df.drop(cols, axis = 1, inplace=True)

In [106]:
augmented_df

Unnamed: 0,Age,Gender,Current_residence,District,Parent_occupation,Displacement,Displaced_months,Parent_current_employment,Experiencing_bombs,Family_member_loss,...,Academic_performance_after,Participation_in_social_activities,Current_need_of_psychological_support,Current_need_of_educational_materials,Current_need_of_safe_housing,Current_need_of_medical_care,Current_need_of_food/water,Mental_health_category,index_before,index_after
0,8,1,1,1,3,2,0,3,2,2,...,2,1,3,2,2,2,2,Thriving,2.17,2.17
1,8,2,1,4,1,1,2,1,1,2,...,1,1,2,2,2,2,2,Thriving,2.33,3.33
2,8,1,1,4,1,1,2,1,1,2,...,1,1,1,2,2,2,2,Thriving,1.67,2.33
3,9,1,1,4,1,1,4,1,1,2,...,1,1,2,2,2,2,2,Thriving,1.00,2.17
4,8,2,2,4,1,1,5,1,1,1,...,2,2,3,2,2,2,2,Thriving,1.33,2.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289,10,2,1,6,1,2,0,3,1,2,...,1,2,1,3,2,2,2,Needs help,1.17,2.00
290,8,2,1,2,1,2,0,1,1,2,...,1,2,1,2,2,2,2,Needs help,1.50,3.17
291,9,2,1,3,1,2,0,1,1,2,...,2,2,1,2,2,2,2,Needs help,1.50,3.00
292,10,2,1,1,1,2,0,3,1,2,...,1,1,1,3,2,2,2,Needs help,1.33,2.50


## Outcome saved in a .csv

In [107]:
augmented_df.to_csv("Model_input.csv", index=False)