In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from src.core import DataProcessor

In [3]:
data_processor = DataProcessor(data="BMK_2018.csv")
data_processor

<class 'str'> BMK_2018.csv

In [4]:
df = data_processor.get_data()

In [5]:
data_processor.df.shape

(227959, 215)

In [6]:
print(data_processor.rater_col_name)
print(data_processor.id_key)

RaterType
ESI_Key


In [7]:
data_processor.median_rater_counts()

RaterType
boss             1.0
direct report    3.0
other            1.0
peer             4.0
superior         1.0
dtype: float64

In [8]:
data_processor.filter_data_with_all_raters()
data_processor.df.shape

(166814, 215)

In [9]:
data_processor.df.RaterType.unique()

array(['boss', 'direct report', 'peer', 'self'], dtype=object)

In [10]:
data_processor.items_cols

[]

In [11]:
demo_cols = [
    "Age",
    "Race",
    "OrganizationSector",
    "OrganizationType",
    "RaterType",
    "OrganizationLevel",
    "Gender"
]

# all the items cols
items_cols = ["ESI_Key", "RaterType", "BMK_S01_Strategic", "BMK_S02_Quick", "BMK_S03_Decisive", "BMK_S04_Change", "BMK_S05_Leading", "BMK_S06_Confront", "BMK_S07_Participative",
              "BMK_S08_Build", "BMK_S09_Compassion", "BMK_S10_Putting", "BMK_S11_Respect", "BMK_D01_Interper", "BMK_D02_DiffBuild", "BMK_D03_DiffChange", "BMK_D04_Failure", "BMK_D05_Narrow"]


data_processor.items_cols = items_cols
data_processor.demo_cols = demo_cols

In [12]:
df = data_processor.pivot_rater_data(skip=2)

In [13]:
column_names=[
'BMK_S01_Strategic',
'BMK_S02_Quick',
'BMK_S03_Decisive',
'BMK_S04_Change',
'BMK_S05_Leading',
'BMK_S06_Confront',
'BMK_S07_Participative',
'BMK_S08_Build',
'BMK_S09_Compassion',
'BMK_S10_Putting',
'BMK_S11_Respect',
'BMK_D01_Interper',
'BMK_D02_DiffBuild',
'BMK_D03_DiffChange',
'BMK_D04_Failure',
'BMK_D05_Narrow',
'BMK_S01_Strategic_boss',
'BMK_S01_Strategic_direct_report',
'BMK_S01_Strategic_peer',
'BMK_S02_Quick_boss',
'BMK_S02_Quick_direct_report',
'BMK_S02_Quick_peer',
'BMK_S03_Decisive_boss',
'BMK_S03_Decisive_direct_report',
'BMK_S03_Decisive_peer',
'BMK_S04_Change_boss',
'BMK_S04_Change_direct_report',
'BMK_S04_Change_peer',
'BMK_S05_Leading_boss',
'BMK_S05_Leading_direct_report',
'BMK_S05_Leading_peer',
'BMK_S06_Confront_boss',
'BMK_S06_Confront_direct_report',
'BMK_S06_Confront_peer',
'BMK_S07_Participative_boss',
'BMK_S07_Participative_direct_report',
'BMK_S07_Participative_peer',
'BMK_S08_Build_boss',
'BMK_S08_Build_direct_report',
'BMK_S08_Build_peer',
'BMK_S09_Compassion_boss',
'BMK_S09_Compassion_direct_report',
'BMK_S09_Compassion_peer',
'BMK_S10_Putting_boss',
'BMK_S10_Putting_direct_report',
'BMK_S10_Putting_peer',
'BMK_S11_Respect_boss',
'BMK_S11_Respect_direct_report',
'BMK_S11_Respect_peer',
'BMK_D01_Interper_boss',
'BMK_D01_Interper_direct_report',
'BMK_D01_Interper_peer',
'BMK_D02_DiffBuild_boss',
'BMK_D02_DiffBuild_direct_report',
'BMK_D02_DiffBuild_peer',
'BMK_D03_DiffChange_boss',
'BMK_D03_DiffChange_direct_report',
'BMK_D03_DiffChange_peer',
'BMK_D04_Failure_boss',
'BMK_D04_Failure_direct_report',
'BMK_D04_Failure_peer',
'BMK_D05_Narrow_boss',
'BMK_D05_Narrow_direct_report',
'BMK_D05_Narrow_peer'
]

In [14]:
from src.core import SynthLeader

In [15]:
synth = SynthLeader(df=df, name="BMK_2018")

Cuda: True


In [16]:
synth.metadata

{
    "primary_key": "ESI_Key",
    "METADATA_SPEC_VERSION": "SINGLE_TABLE_V1",
    "columns": {
        "ESI_Key": {
            "sdtype": "id"
        },
        "RaterType": {
            "sdtype": "categorical"
        },
        "BMK_S01_Strategic": {
            "sdtype": "numerical"
        },
        "BMK_S02_Quick": {
            "sdtype": "numerical"
        },
        "BMK_S03_Decisive": {
            "sdtype": "numerical"
        },
        "BMK_S04_Change": {
            "sdtype": "numerical"
        },
        "BMK_S05_Leading": {
            "sdtype": "numerical"
        },
        "BMK_S06_Confront": {
            "sdtype": "numerical"
        },
        "BMK_S07_Participative": {
            "sdtype": "numerical"
        },
        "BMK_S08_Build": {
            "sdtype": "numerical"
        },
        "BMK_S09_Compassion": {
            "sdtype": "numerical"
        },
        "BMK_S10_Putting": {
            "sdtype": "numerical"
        },
        "BMK_S11_Respect":

## Real Dataset

In [17]:
real_corr = synth.generate_corr_matrix(df=df)
synth.style_correlation_matrix(real_corr)

Unnamed: 0,ESI_Key,BMK_S01_Strategic,BMK_S02_Quick,BMK_S03_Decisive,BMK_S04_Change,BMK_S05_Leading,BMK_S06_Confront,BMK_S07_Participative,BMK_S08_Build,BMK_S09_Compassion,BMK_S10_Putting,BMK_S11_Respect,BMK_D01_Interper,BMK_D02_DiffBuild,BMK_D03_DiffChange,BMK_D04_Failure,BMK_D05_Narrow,BMK_S01_Strategic_boss,BMK_S01_Strategic_direct_report,BMK_S01_Strategic_peer,BMK_S02_Quick_boss,BMK_S02_Quick_direct_report,BMK_S02_Quick_peer,BMK_S03_Decisive_boss,BMK_S03_Decisive_direct_report,BMK_S03_Decisive_peer,BMK_S04_Change_boss,BMK_S04_Change_direct_report,BMK_S04_Change_peer,BMK_S05_Leading_boss,BMK_S05_Leading_direct_report,BMK_S05_Leading_peer,BMK_S06_Confront_boss,BMK_S06_Confront_direct_report,BMK_S06_Confront_peer,BMK_S07_Participative_boss,BMK_S07_Participative_direct_report,BMK_S07_Participative_peer,BMK_S08_Build_boss,BMK_S08_Build_direct_report,BMK_S08_Build_peer,BMK_S09_Compassion_boss,BMK_S09_Compassion_direct_report,BMK_S09_Compassion_peer,BMK_S10_Putting_boss,BMK_S10_Putting_direct_report,BMK_S10_Putting_peer,BMK_S11_Respect_boss,BMK_S11_Respect_direct_report,BMK_S11_Respect_peer,BMK_D01_Interper_boss,BMK_D01_Interper_direct_report,BMK_D01_Interper_peer,BMK_D02_DiffBuild_boss,BMK_D02_DiffBuild_direct_report,BMK_D02_DiffBuild_peer,BMK_D03_DiffChange_boss,BMK_D03_DiffChange_direct_report,BMK_D03_DiffChange_peer,BMK_D04_Failure_boss,BMK_D04_Failure_direct_report,BMK_D04_Failure_peer,BMK_D05_Narrow_boss,BMK_D05_Narrow_direct_report,BMK_D05_Narrow_peer
ESI_Key,1.0,0.054991,0.015142,0.025911,0.043287,0.058062,0.029063,0.049497,0.053077,0.038487,0.036943,0.009083,-0.023299,-0.031172,-0.015627,-0.005586,-0.008601,0.082718,0.091968,0.10951,0.03719,0.043971,0.055676,0.047977,0.072111,0.081249,0.085992,0.090181,0.112627,0.086505,0.08969,0.107221,0.070602,0.085385,0.089749,0.076506,0.081978,0.102685,0.09412,0.083913,0.106267,0.062698,0.059735,0.081546,0.039344,0.052481,0.065289,0.039266,0.062846,0.060808,-0.063672,-0.034593,-0.065071,-0.080814,-0.044903,-0.08538,-0.06458,-0.03706,-0.069983,-0.040614,-0.029332,-0.056909,-0.068249,-0.040053,-0.082688
BMK_S01_Strategic,0.054991,1.0,0.524717,0.54934,0.675164,0.657132,0.512309,0.541989,0.603105,0.448001,0.362729,0.418968,-0.225238,-0.400401,-0.421448,-0.425241,-0.414343,0.196438,0.16454,0.184291,0.142772,0.111914,0.12191,0.181183,0.160723,0.186307,0.154751,0.126994,0.137833,0.147783,0.115821,0.125488,0.162198,0.117466,0.143662,0.09651,0.080538,0.079622,0.099081,0.090092,0.085135,0.095772,0.079293,0.074455,0.053152,0.045612,0.051146,0.076722,0.075862,0.075592,-0.005192,0.00221,0.005128,-0.094241,-0.055887,-0.075948,-0.08577,-0.054483,-0.07158,-0.093161,-0.05871,-0.07923,-0.127132,-0.083972,-0.101984
BMK_S02_Quick,0.015142,0.524717,1.0,0.430088,0.471387,0.434066,0.31709,0.347168,0.382463,0.301314,0.244349,0.315575,-0.098661,-0.236144,-0.242769,-0.324646,-0.313347,0.051384,0.094396,0.06974,0.165658,0.1855,0.178526,0.086154,0.120175,0.112767,0.025963,0.06829,0.041486,0.013531,0.050866,0.031118,0.036126,0.060872,0.052339,-0.036336,0.021781,-0.013304,-0.039212,0.025498,-0.007688,-0.015509,0.019351,-0.013476,-0.050762,-0.014397,-0.038103,0.004069,0.041716,0.014078,0.080313,0.037274,0.069967,0.012966,-0.005124,0.004727,0.033432,-0.002681,0.02356,-0.001786,-0.026045,-0.015668,-0.023439,-0.048985,-0.042812
BMK_S03_Decisive,0.025911,0.54934,0.430088,1.0,0.600144,0.556867,0.580658,0.381431,0.387927,0.294825,0.176501,0.32557,-0.109301,-0.354129,-0.333328,-0.426766,-0.391355,0.080584,0.089751,0.07125,0.084422,0.066015,0.058621,0.22847,0.210693,0.228672,0.067638,0.064064,0.048923,0.063664,0.059148,0.039101,0.1537,0.119363,0.139367,-0.037401,-0.007174,-0.042376,-0.040591,-0.011034,-0.041138,-0.008201,-0.013275,-0.032582,-0.068989,-0.053954,-0.070972,-0.017661,-0.002691,-0.021805,0.117148,0.088669,0.124979,-0.015974,-0.010411,0.003819,0.015913,0.005221,0.0229,-0.025338,-0.026119,-0.013523,-0.047728,-0.059114,-0.039335
BMK_S04_Change,0.043287,0.675164,0.471387,0.600144,1.0,0.749803,0.627199,0.747747,0.672728,0.586817,0.405134,0.535835,-0.35848,-0.515481,-0.494925,-0.448662,-0.414728,0.088095,0.127308,0.117377,0.053698,0.072627,0.060096,0.120441,0.146803,0.151752,0.11975,0.138741,0.13484,0.113885,0.128294,0.124962,0.142686,0.133308,0.142463,0.072763,0.108038,0.09316,0.07187,0.109301,0.094404,0.085801,0.098996,0.097369,0.05548,0.088405,0.084634,0.06714,0.093133,0.085583,-0.031118,-0.041472,-0.037001,-0.080358,-0.076928,-0.088176,-0.064724,-0.074539,-0.076914,-0.041586,-0.055879,-0.059072,-0.062391,-0.079083,-0.080191
BMK_S05_Leading,0.058062,0.657132,0.434066,0.556867,0.749803,1.0,0.634796,0.708748,0.627562,0.62686,0.44997,0.517291,-0.352023,-0.555379,-0.414922,-0.411686,-0.374195,0.063571,0.147553,0.099536,0.030241,0.070228,0.039913,0.109774,0.171995,0.144996,0.098987,0.169888,0.119063,0.159846,0.199727,0.157263,0.145037,0.169687,0.14549,0.076592,0.15672,0.096294,0.039466,0.132123,0.074745,0.121039,0.148489,0.111207,0.047252,0.128832,0.077476,0.074794,0.118181,0.083471,-0.008751,-0.064086,-0.017824,-0.102888,-0.126151,-0.098836,-0.025564,-0.082911,-0.0434,-0.012847,-0.069312,-0.038974,-0.03671,-0.090184,-0.060577
BMK_S06_Confront,0.029063,0.512309,0.31709,0.580658,0.627199,0.634796,1.0,0.461457,0.425013,0.400383,0.230828,0.351757,-0.202419,-0.457747,-0.360868,-0.379359,-0.365789,0.040455,0.10394,0.061533,-0.003615,0.027466,-0.003845,0.142439,0.179122,0.16102,0.078829,0.113779,0.075632,0.090438,0.116353,0.083068,0.234518,0.20401,0.209018,0.008563,0.068884,0.021719,-0.01626,0.040821,0.000374,0.039465,0.052782,0.028424,-0.024646,0.008643,-0.011601,0.02302,0.041872,0.016034,0.060847,0.020822,0.061248,-0.056175,-0.073476,-0.046432,-0.003656,-0.043285,-0.008525,-0.00322,-0.045247,-0.010449,-0.031955,-0.076287,-0.035686
BMK_S07_Participative,0.049497,0.541989,0.347168,0.381431,0.747747,0.708748,0.461457,1.0,0.675143,0.66093,0.477122,0.566365,-0.475765,-0.491662,-0.447668,-0.368064,-0.310073,0.048672,0.106029,0.105068,0.010381,0.048625,0.038372,0.021604,0.080242,0.068129,0.099239,0.151142,0.142587,0.112432,0.150638,0.152571,0.075536,0.111536,0.102196,0.12932,0.176943,0.168902,0.107909,0.156387,0.156331,0.131564,0.157572,0.161864,0.106013,0.161801,0.157537,0.092549,0.122119,0.129826,-0.103404,-0.125341,-0.138607,-0.094542,-0.115397,-0.12443,-0.070631,-0.102457,-0.112326,-0.0332,-0.067923,-0.079406,-0.032815,-0.069828,-0.08906
BMK_S08_Build,0.053077,0.603105,0.382463,0.387927,0.672728,0.627562,0.425013,0.675143,1.0,0.556333,0.564771,0.475827,-0.45303,-0.435358,-0.464669,-0.376675,-0.3529,0.103581,0.140145,0.170536,0.038066,0.076778,0.083724,0.054217,0.100069,0.112392,0.143259,0.162344,0.192995,0.129378,0.155789,0.18047,0.077954,0.106214,0.117692,0.163064,0.169069,0.206518,0.218544,0.215102,0.256495,0.153033,0.168418,0.183795,0.189082,0.201468,0.238831,0.116744,0.142878,0.1654,-0.165121,-0.151929,-0.202763,-0.116633,-0.116847,-0.163468,-0.128632,-0.135853,-0.182995,-0.063784,-0.091235,-0.128205,-0.092137,-0.100213,-0.144924
BMK_S09_Compassion,0.038487,0.448001,0.301314,0.294825,0.586817,0.62686,0.400383,0.66093,0.556333,1.0,0.511719,0.56088,-0.409709,-0.446598,-0.36981,-0.307685,-0.268594,0.01056,0.082381,0.065302,-0.014132,0.033491,0.01358,-0.009798,0.056942,0.035711,0.050201,0.126433,0.097823,0.08131,0.140277,0.126406,0.050208,0.097521,0.075784,0.081663,0.148317,0.125466,0.062837,0.13881,0.114853,0.168262,0.217088,0.192221,0.109731,0.189877,0.15902,0.100791,0.142389,0.129034,-0.062919,-0.105658,-0.098566,-0.066467,-0.104189,-0.102096,-0.026838,-0.076225,-0.067379,0.007109,-0.042397,-0.041102,0.006951,-0.045038,-0.044376


In [18]:
real_corr

Unnamed: 0,ESI_Key,BMK_S01_Strategic,BMK_S02_Quick,BMK_S03_Decisive,BMK_S04_Change,BMK_S05_Leading,BMK_S06_Confront,BMK_S07_Participative,BMK_S08_Build,BMK_S09_Compassion,...,BMK_D02_DiffBuild_peer,BMK_D03_DiffChange_boss,BMK_D03_DiffChange_direct_report,BMK_D03_DiffChange_peer,BMK_D04_Failure_boss,BMK_D04_Failure_direct_report,BMK_D04_Failure_peer,BMK_D05_Narrow_boss,BMK_D05_Narrow_direct_report,BMK_D05_Narrow_peer
ESI_Key,1.000000,0.054991,0.015142,0.025911,0.043287,0.058062,0.029063,0.049497,0.053077,0.038487,...,-0.085380,-0.064580,-0.037060,-0.069983,-0.040614,-0.029332,-0.056909,-0.068249,-0.040053,-0.082688
BMK_S01_Strategic,0.054991,1.000000,0.524717,0.549340,0.675164,0.657132,0.512309,0.541989,0.603105,0.448001,...,-0.075948,-0.085770,-0.054483,-0.071580,-0.093161,-0.058710,-0.079230,-0.127132,-0.083972,-0.101984
BMK_S02_Quick,0.015142,0.524717,1.000000,0.430088,0.471387,0.434066,0.317090,0.347168,0.382463,0.301314,...,0.004727,0.033432,-0.002681,0.023560,-0.001786,-0.026045,-0.015668,-0.023439,-0.048985,-0.042812
BMK_S03_Decisive,0.025911,0.549340,0.430088,1.000000,0.600144,0.556867,0.580658,0.381431,0.387927,0.294825,...,0.003819,0.015913,0.005221,0.022900,-0.025338,-0.026119,-0.013523,-0.047728,-0.059114,-0.039335
BMK_S04_Change,0.043287,0.675164,0.471387,0.600144,1.000000,0.749803,0.627199,0.747747,0.672728,0.586817,...,-0.088176,-0.064724,-0.074539,-0.076914,-0.041586,-0.055879,-0.059072,-0.062391,-0.079083,-0.080191
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BMK_D04_Failure_direct_report,-0.029332,-0.058710,-0.026045,-0.026119,-0.055879,-0.069312,-0.045247,-0.067923,-0.091235,-0.042397,...,0.301761,0.230737,0.861327,0.284107,0.261638,1.000000,0.326801,0.245475,0.854583,0.294784
BMK_D04_Failure_peer,-0.056909,-0.079230,-0.015668,-0.013523,-0.059072,-0.038974,-0.010449,-0.079406,-0.128205,-0.041102,...,0.775289,0.324451,0.292349,0.837771,0.348174,0.326801,1.000000,0.335550,0.295196,0.820753
BMK_D05_Narrow_boss,-0.068249,-0.127132,-0.023439,-0.047728,-0.062391,-0.036710,-0.031955,-0.032815,-0.092137,0.006951,...,0.267229,0.718035,0.239196,0.322571,0.739775,0.245475,0.335550,1.000000,0.261749,0.368590
BMK_D05_Narrow_direct_report,-0.040053,-0.083972,-0.048985,-0.059114,-0.079083,-0.090184,-0.076287,-0.069828,-0.100213,-0.045038,...,0.290864,0.212983,0.826469,0.271278,0.229244,0.854583,0.295196,0.261749,1.000000,0.316130


## Train Copula GAN Synthesizer

In [None]:
if torch.cuda.is_available():
    copula_gan_synthesizer = synth.train_copula_gan_synthesizer(
        model_name='bmk2018_copula_gan.pkl')
else:
    print("Cuda is not available. Running this model will take several hours on CPU.")

In [None]:
copula_gan_data = synth.generate_synthetic_sample(
    copula_gan_synthesizer, 15000)
copula_gan_corr = synth.generate_corr_matrix(df=copula_gan_data)
synth.style_correlation_matrix(copula_gan_corr)

In [None]:
real_corr.to_csv('./data/real_data_corr.csv')
copula_corr.to_csv('./data/copula_corr.csv')
ctgan_corr.to_csv('./data/ctgan_corr.csv')