In [1]:
import warnings
warnings.filterwarnings('ignore')

# Synthesize models

## Models

### GaussianCopula Model

In [2]:
from sdv.tabular import GaussianCopula

#### What is GaussianCopula?
In mathematical terms, a [copula](https://en.wikipedia.org/wiki/Copula_%28probability_theory%29) is a distribution over the unit cube $[0,1]^d$ which is constructed from a multivariate normal distribution over ${\mathbb {R} ^{d}}$ by using the probability integral transform. Intuitively, a copula is a mathematical function that allows us to describe the joint distribution of multiple random variables by analyzing the dependencies between their marginal distributions.

#### Fitting model

In [3]:
from sdv.demo import load_tabular_demo

data = load_tabular_demo('student_placements_pii')

print(data.info())
data.sample(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 215 entries, 0 to 214
Data columns (total 18 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   student_id          215 non-null    int64         
 1   address             215 non-null    object        
 2   gender              215 non-null    object        
 3   second_perc         215 non-null    float64       
 4   high_perc           215 non-null    float64       
 5   high_spec           215 non-null    object        
 6   degree_perc         215 non-null    float64       
 7   degree_type         215 non-null    object        
 8   work_experience     215 non-null    bool          
 9   experience_years    215 non-null    int64         
 10  employability_perc  215 non-null    float64       
 11  mba_spec            215 non-null    object        
 12  mba_perc            215 non-null    float64       
 13  salary              148 non-null    float64       

Unnamed: 0,student_id,address,gender,second_perc,high_perc,high_spec,degree_perc,degree_type,work_experience,experience_years,employability_perc,mba_spec,mba_perc,salary,placed,start_date,end_date,duration
121,17385,767 Katherine Turnpike Apt. 168\nPort Makaylav...,F,64.0,67.0,Science,69.6,Sci&Tech,True,1,55.67,Mkt&HR,71.49,25000.0,True,2020-06-30,2020-10-06,3.0
19,17283,"87765 William Mews Suite 474\nMichaelhaven, IL...",M,60.0,67.0,Arts,70.0,Comm&Mgmt,True,1,50.48,Mkt&Fin,77.89,23600.0,True,2020-01-24,2021-01-08,12.0
82,17346,Unit 7455 Box 2916\nDPO AE 28179,M,63.0,67.0,Commerce,74.0,Comm&Mgmt,False,0,82.0,Mkt&Fin,60.44,,False,NaT,NaT,
17,17281,"36924 Jamie Cliffs\nPhillipmouth, TX 21393",F,55.0,67.0,Commerce,64.0,Comm&Mgmt,False,0,60.0,Mkt&Fin,67.28,,False,NaT,NaT,
21,17285,"1350 Tyler Hollow\nNew Jacquelineport, OH 59348",F,79.0,76.0,Commerce,85.0,Comm&Mgmt,False,0,95.0,Mkt&Fin,69.06,39300.0,True,2020-01-18,2020-04-08,3.0


##### without Primary Key of the table, without Anonymizing Personally Identifiable Information (No PII)

In [4]:
model = GaussianCopula()
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")

synthesize student_id max values count: 5
synthesize address is in original dataset:200


##### with Primary Key of the table

In [5]:
GCkwargs = {"primary_key":'student_id'}

model = GaussianCopula(**GCkwargs)
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")
synthesize.head()

synthesize student_id max values count: 1
synthesize address is in original dataset:200


Unnamed: 0,student_id,address,gender,second_perc,high_perc,high_spec,degree_perc,degree_type,work_experience,experience_years,employability_perc,mba_spec,mba_perc,salary,placed,start_date,end_date,duration
0,0,"65737 Meyer Junction Suite 154\nWest Steven, N...",M,73.43,62.2,Science,63.59,Comm&Mgmt,False,0,73.75,Mkt&HR,54.99,27000.0,True,2020-02-15,2020-08-24,6.0
1,1,"3534 Martinez Parks Suite 682\nLake Anthony, N...",M,84.08,68.59,Science,70.03,Comm&Mgmt,False,0,58.75,Mkt&Fin,61.86,27100.0,True,2020-07-26,2020-10-19,4.0
2,2,"47565 Davis Expressway\nDenisemouth, ID 54974",M,58.66,64.2,Science,64.24,Comm&Mgmt,False,1,95.36,Mkt&Fin,51.21,27000.0,True,2020-02-14,2021-01-06,10.0
3,3,"1729 Thomas Islands Apt. 583\nSanchezview, ND ...",M,77.84,75.68,Science,65.37,Comm&Mgmt,False,1,93.1,Mkt&Fin,51.77,28500.0,True,2020-03-09,2020-09-02,3.0
4,4,"99895 Jorge Manor Apt. 381\nWest Christine, FL...",M,82.96,65.53,Science,65.65,Comm&Mgmt,False,0,63.51,Mkt&HR,64.41,31200.0,False,2020-02-06,2020-07-31,5.0


##### With Anonymizing Personally Identifiable Information (PII)
Using [Faker Provider](https://faker.readthedocs.io/en/master/providers.html) to create data? (Note: check code where to use this api, create training data or using in inference)

In [6]:
GCkwargs = {"primary_key":'student_id',
            "anonymize_fields":{'address': 'address',
                               },
            
           }
model = GaussianCopula(**GCkwargs)
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")
synthesize.head()

synthesize address is in original dataset:0


Unnamed: 0,student_id,address,gender,second_perc,high_perc,high_spec,degree_perc,degree_type,work_experience,experience_years,employability_perc,mba_spec,mba_perc,salary,placed,start_date,end_date,duration
0,0,"354 Joshua Centers Apt. 848\nNew Justinville, ...",F,70.93,77.09,Commerce,70.27,Comm&Mgmt,False,0,51.76,Mkt&HR,57.39,27400.0,True,2020-01-12,2020-06-21,5.0
1,1,"290 Carolyn Key\nMarybury, NJ 12636",M,50.22,64.9,Commerce,60.12,Comm&Mgmt,False,1,63.98,Mkt&Fin,62.5,,False,NaT,NaT,
2,2,65113 Shaw Plaza Suite 585\nEast Kimberlyshire...,F,76.12,66.92,Science,64.98,Comm&Mgmt,False,1,67.52,Mkt&HR,75.02,,False,NaT,NaT,
3,3,"10669 Vincent Forge\nWest Timothy, CT 34954",M,84.68,73.03,Science,82.8,Sci&Tech,False,1,95.52,Mkt&Fin,75.56,26400.0,True,2020-05-05,2020-07-26,4.0
4,4,"266 Brown Court\nJessicamouth, IN 48249",M,60.84,90.58,Commerce,61.99,Comm&Mgmt,False,0,56.18,Mkt&Fin,67.34,,False,NaT,NaT,


##### Field_distributions

GaussianCopula offers the possibility to indicate which distribution to use for each one of the columns in the table, in order to solve situations like the one that we just described. In order to do this, we need to pass a field_distributions argument with dict that indicates the distribution that we want to use for each column.

Possible values for the distribution argument are:
- `univariate`: Let copulas select the optimal univariate distribution. This may result in non-parametric models being used.
- `parametric`: Let copulas select the optimal univariate distribution, but restrict the selection to parametric distributions only.
- `bounded`: Let copulas select the optimal univariate distribution, but restrict the selection to bounded distributions only. This may result in non-parametric models being used.
- `semi_bounded`: Let copulas select the optimal univariate distribution, but restrict the selection to semi-bounded distributions only. This may result in non-parametric models being used.
- `parametric_bounded`: Let copulas select the optimal univariate distribution, but restrict the selection to parametric and bounded distributions only.
- `parametric_semi_bounded`: Let copulas select the optimal univariate distribution, but restrict the selection to parametric and semi-bounded distributions only.
- `gaussian`: Use a Gaussian distribution.
- `gamma`: Use a Gamma distribution.
- `beta`: Use a Beta distribution.
- `student_t`: Use a Student T distribution.
- `gaussian_kde`: Use a GaussianKDE distribution. This model is non-parametric, so using this will make get_parameters unusable.
- `truncated_gaussian`: Use a Truncated Gaussian distribution.

In [7]:
GCkwargs = {"primary_key":'student_id',
            "anonymize_fields":{'address': 'address',
                               },
            "min_value":None,
            "max_value":None,
            "rounding":2,
            "field_distributions": {"experience_years": "gamma",
                                    "second_perc": "beta",
                                   },
           }
model = GaussianCopula(**GCkwargs)
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")
synthesize.head()

synthesize address is in original dataset:0


Unnamed: 0,student_id,address,gender,second_perc,high_perc,high_spec,degree_perc,degree_type,work_experience,experience_years,employability_perc,mba_spec,mba_perc,salary,placed,start_date,end_date,duration
0,0,"12317 Gary Walks\nWangland, CA 08927",M,88.0,77.77,Science,67.44,Sci&Tech,False,0,80.08,Mkt&Fin,77.88,50563.21,True,2020-05-21,2020-05-22,0.24
1,1,"1468 Rose Station\nBuchananberg, VA 24610",M,85.17,97.86,Commerce,77.34,Comm&Mgmt,False,0,95.57,Mkt&Fin,59.38,36812.15,True,2020-01-12,2020-05-30,4.7
2,2,"481 Darin Manor Suite 577\nJaneburgh, NV 35064",M,69.68,64.47,Science,70.37,Comm&Mgmt,False,0,78.37,Mkt&Fin,61.12,30710.23,True,2020-01-11,2020-08-22,7.71
3,3,"435 Jeremy Prairie Suite 799\nPort Misty, MO 6...",F,62.79,67.15,Commerce,70.18,Comm&Mgmt,False,0,86.05,Mkt&Fin,62.27,29080.74,True,2020-01-16,2020-11-03,10.36
4,4,"3079 Jeffrey Walk Apt. 658\nKathrynton, TN 45973",M,69.15,81.2,Science,59.54,Sci&Tech,False,0,88.21,Mkt&Fin,62.66,35032.6,True,2020-01-28,2020-06-21,4.57


### CTGAN

In [8]:
from sdv.tabular import CTGAN

#### Fitting model

##### without Primary Key of the table, without Anonymizing Personally Identifiable Information (No PII)

In [9]:
model = CTGAN()
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")

synthesize student_id max values count: 86
synthesize address is in original dataset:200


##### with Primary Key of the table

In [10]:
CTGANkwargs = {"primary_key":'student_id'}

model = CTGAN(**CTGANkwargs)
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")

synthesize student_id max values count: 1
synthesize address is in original dataset:200


##### With Anonymizing Personally Identifiable Information (PII)

In [11]:
CTGANkwargs = {"primary_key":'student_id',
               "anonymize_fields":{'address': 'address',
                                  },
           }

model = CTGAN(**CTGANkwargs)
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")
synthesize.sample(10)

synthesize student_id max values count: 1
synthesize address is in original dataset:0


Unnamed: 0,student_id,address,gender,second_perc,high_perc,high_spec,degree_perc,degree_type,work_experience,experience_years,employability_perc,mba_spec,mba_perc,salary,placed,start_date,end_date,duration
119,119,"2816 Miller Underpass\nWest Jamiestad, LA 53703",M,82.91,76.02,Science,72.98,Comm&Mgmt,True,0,67.94,Mkt&Fin,55.56,31600.0,True,NaT,NaT,3.0
24,24,"284 Jeffrey Forks Apt. 907\nGatesburgh, IN 40579",F,50.53,64.08,Science,50.0,Comm&Mgmt,True,0,98.0,Mkt&HR,51.21,27500.0,True,NaT,2020-10-21,6.0
168,168,"00737 Fuentes Port\nPort Meganhaven, NM 03313",F,70.56,63.7,Science,71.28,Comm&Mgmt,False,0,50.47,Mkt&HR,74.04,32900.0,True,2020-05-06,2020-10-24,
137,137,"560 Samuel Courts Apt. 750\nBradfordborough, U...",M,71.1,59.66,Commerce,63.94,Comm&Mgmt,False,1,76.84,Mkt&Fin,51.21,,True,2020-01-11,NaT,
32,32,"76719 Thompson Common Suite 150\nNew Alexis, H...",F,45.33,58.87,Commerce,50.0,Comm&Mgmt,False,0,64.72,Mkt&HR,63.21,,True,2020-01-14,2020-10-27,
100,100,65248 Harrison Walk Suite 753\nSouth Dianafort...,M,51.98,68.14,Science,50.0,Others,False,0,69.95,Mkt&HR,60.9,30100.0,True,2020-02-05,2020-07-12,
34,34,"17435 Brown Track\nPort Kyle, WA 05198",M,53.19,78.35,Science,50.0,Comm&Mgmt,True,0,60.17,Mkt&HR,63.93,,True,2020-02-29,2020-08-10,
99,99,USNS Douglas\nFPO AP 53623,M,86.03,56.36,Commerce,50.31,Comm&Mgmt,True,0,84.49,Mkt&HR,76.83,,True,2020-01-19,NaT,
114,114,"3182 Christopher Loop Suite 298\nJoshuaville, ...",M,75.12,58.18,Commerce,58.42,Comm&Mgmt,False,0,98.0,Mkt&HR,55.05,24300.0,True,NaT,NaT,
46,46,"591 Douglas Parkway\nPort Joseph, UT 22137",F,72.46,64.14,Science,62.78,Comm&Mgmt,False,0,51.26,Mkt&HR,71.09,,True,2020-06-26,2020-10-02,12.0


##### Modify CTGAN Hyperparameters

In [12]:
CTGANkwargs = {"primary_key":'student_id',
               "anonymize_fields":{'address': 'address',
                                  },
               "min_value":None,
               "max_value":None,
               "rounding":2,
               "verbose": False,
               #GAN families
               "epochs": 300,
               "batch_size":500,
               "log_frequency": True, 
               #affects how the model processes the frequencies of the categorical values
               "generator_dim": (256,256),
               "embedding_dim":128,
               "discriminator_dim": (256,256),
               "generator_lr": 2e-4,
               "discriminator_lr": 2e-4,
               "generator_decay": 1e-6,
               "discriminator_decay": 1e-6,
               "discriminator_steps": 1, 
               #Number of discriminator updates to do for each generator update (from WGAN)
               "cuda": True, 
           }

model = CTGAN(**CTGANkwargs)
model.fit(data)

In [13]:
synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")

synthesize student_id max values count: 1
synthesize address is in original dataset:0


##### conditions synthesize

In [14]:
conditions = {'second_perc': '>50'}
synthesize = model.sample(200, conditions)
synthesize.head()

Unnamed: 0,student_id,address,gender,second_perc,high_perc,high_spec,degree_perc,degree_type,work_experience,experience_years,employability_perc,mba_spec,mba_perc,salary,placed,start_date,end_date,duration
0,0,"454 Maldonado Keys Apt. 185\nBrittanychester, ...",M,81.05,61.49,Commerce,81.66,Comm&Mgmt,True,0,56.94,Mkt&Fin,54.9,25247.96,False,2020-07-31,2020-07-18,6.22
1,1,"797 Harrison Fields Apt. 481\nBakerside, LA 99665",M,76.01,63.23,Science,77.29,Sci&Tech,False,0,67.17,Mkt&Fin,62.64,25606.17,True,2019-12-20,NaT,5.88
2,2,"5865 Gill Plains Suite 026\nKatherinestad, AL ...",M,64.24,62.84,Commerce,82.36,Comm&Mgmt,False,0,97.09,Mkt&HR,68.08,30420.6,True,NaT,NaT,
3,3,"1329 Smith Greens\nPort Nicoleborough, NV 14030",F,84.7,59.51,Science,60.55,Comm&Mgmt,False,0,89.83,Mkt&Fin,67.36,32112.7,False,NaT,2020-05-24,6.58
4,4,"3551 Banks Trail Suite 505\nElizabethburgh, MD...",F,78.27,73.54,Science,76.92,Others,False,0,74.03,Mkt&Fin,58.6,27898.72,False,NaT,NaT,5.89


### TVEA

In [15]:
from sdv.tabular import TVAE

#### Fitting model

##### without Primary Key of the table, without Anonymizing Personally Identifiable Information (No PII)

In [16]:
model = TVAE()
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")

synthesize student_id max values count: 7
synthesize address is in original dataset:200


##### with Primary Key of the table

In [17]:
TVAEkwargs = {"primary_key":'student_id'}

model = TVAE(**TVAEkwargs)
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")

synthesize student_id max values count: 1
synthesize address is in original dataset:200


##### With Anonymizing Personally Identifiable Information (PII)

In [18]:
TVAEkwargs = {"primary_key":'student_id',
               "anonymize_fields":{'address': 'address',
                                  },
           }

model = TVAE(**TVAEkwargs)
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")
synthesize.sample(10)

synthesize student_id max values count: 1
synthesize address is in original dataset:0


Unnamed: 0,student_id,address,gender,second_perc,high_perc,high_spec,degree_perc,degree_type,work_experience,experience_years,employability_perc,mba_spec,mba_perc,salary,placed,start_date,end_date,duration
166,166,"317 Potts Roads\nNew Benjamin, HI 10538",M,57.4,61.79,Commerce,51.14,Comm&Mgmt,False,0,57.01,Mkt&HR,57.56,,False,NaT,NaT,
38,38,"2248 West Brooks Apt. 125\nLake Kathleenfurt, ...",M,72.41,48.33,Commerce,63.29,Comm&Mgmt,False,0,57.03,Mkt&HR,53.33,25000.0,True,2020-07-12,2020-09-22,3.0
117,117,"87158 Sarah Ramp Suite 492\nJordanmouth, KY 54114",M,59.15,62.04,Commerce,61.02,Comm&Mgmt,False,0,60.44,Mkt&HR,55.77,27400.0,True,2020-01-07,2020-08-10,6.0
158,158,"01001 Rodriguez Dam Suite 553\nMichaelton, CA ...",M,78.63,49.84,Commerce,58.24,Comm&Mgmt,False,0,70.23,Mkt&Fin,59.78,22400.0,True,2020-01-16,2020-04-16,3.0
145,145,"23193 Angela Ferry Apt. 639\nLeslieton, VA 02022",M,80.6,72.58,Commerce,64.15,Comm&Mgmt,True,0,93.04,Mkt&Fin,63.71,27900.0,True,2020-01-13,2020-10-16,3.0
180,180,"82325 Regina Summit\nPort Miguel, WV 66042",M,51.53,63.04,Commerce,59.61,Comm&Mgmt,False,0,63.82,Mkt&HR,55.63,,False,NaT,NaT,
100,100,"824 Helen Hill\nHardinton, MO 23253",M,78.92,93.57,Commerce,63.25,Comm&Mgmt,True,0,69.48,Mkt&Fin,61.25,25400.0,True,2020-01-11,2020-10-28,3.0
17,17,"203 Kristi Lock\nPort Anthonychester, KS 83517",M,84.47,73.36,Commerce,65.25,Comm&Mgmt,False,0,56.73,Mkt&HR,59.06,25900.0,True,2020-01-24,2020-12-14,3.0
83,83,"6611 Dean Manor\nAnnetteview, GA 98638",M,58.77,60.48,Commerce,65.45,Comm&Mgmt,True,0,54.78,Mkt&Fin,57.99,29300.0,True,2020-01-24,2020-08-22,5.0
35,35,"985 Jonathan Spur Apt. 311\nMunozland, CO 33628",M,79.15,74.36,Commerce,66.53,Comm&Mgmt,False,0,91.3,Mkt&Fin,65.79,24300.0,True,2020-01-25,2020-07-23,6.0


##### Modify TVEA Hyperparameters

In [20]:
TVAEkwargs = {"primary_key":'student_id',
               "anonymize_fields":{'address': 'address',
                                  },
               "min_value":None,
               "max_value":None,
               "rounding":2,
               #GAN families
               "epochs": 300,
               "batch_size":500,
               #affects how the model processes the frequencies of the categorical values
               "compress_dims": (256,256),
               "embedding_dim":128,
               "decompress_dims": (256,256),
               "l2scale": 1e-5,
               "loss_factor": 2, #reconstruct_loss factor scale
               #Number of discriminator updates to do for each generator update (from WGAN)
               "cuda": True, 
           }

model = TVAE(**TVAEkwargs)
model.fit(data)

synthesize = model.sample(200)
print(f"synthesize student_id max values count: {synthesize.student_id.value_counts().max()}") 
print(f"synthesize address is in original dataset:{synthesize.address.isin(data.address).sum()}")
synthesize.sample(10)

synthesize student_id max values count: 1
synthesize address is in original dataset:0


Unnamed: 0,student_id,address,gender,second_perc,high_perc,high_spec,degree_perc,degree_type,work_experience,experience_years,employability_perc,mba_spec,mba_perc,salary,placed,start_date,end_date,duration
114,114,"2404 Summers Spurs Suite 584\nNorth David, MD ...",M,54.8,60.27,Commerce,54.42,Comm&Mgmt,False,0,60.66,Mkt&HR,56.92,,False,NaT,NaT,
54,54,Unit 0887 Box 7516\nDPO AE 78899,M,77.31,73.89,Commerce,68.27,Comm&Mgmt,False,0,63.12,Mkt&Fin,63.45,19431.99,True,2020-01-15,2020-04-24,2.97
34,34,"453 Travis Mountains\nJeffreymouth, NY 56786",M,62.92,68.03,Commerce,66.55,Comm&Mgmt,False,0,65.07,Mkt&Fin,57.56,29147.15,True,2020-07-18,2020-10-25,2.94
91,91,280 Patricia Manor Suite 176\nNorth Barbaralan...,M,67.5,67.91,Commerce,66.19,Comm&Mgmt,False,0,64.14,Mkt&HR,61.87,25078.18,True,2020-01-19,2020-05-02,2.93
93,93,"222 Douglas Fields\nWeissshire, GA 40357",M,62.96,57.51,Commerce,72.14,Comm&Mgmt,True,0,46.05,Mkt&Fin,54.74,29015.84,True,2020-01-18,2020-11-28,3.07
199,199,"887 Richard Grove Apt. 265\nWest Michael, UT 6...",M,56.4,53.15,Commerce,58.2,Comm&Mgmt,False,0,57.47,Mkt&HR,51.18,,False,NaT,NaT,
78,78,"2554 Kenneth Prairie Apt. 011\nAmbershire, AR ...",M,61.71,60.73,Commerce,62.66,Comm&Mgmt,False,0,57.57,Mkt&Fin,54.97,19019.58,True,2020-01-15,2020-10-22,2.94
28,28,USNV Chambers\nFPO AP 52335,M,61.32,60.66,Commerce,62.83,Comm&Mgmt,False,0,61.79,Mkt&Fin,55.0,30553.55,True,2020-01-19,2020-08-03,5.63
198,198,"01879 Miller Branch Apt. 124\nWest Monica, AZ ...",M,66.31,65.5,Commerce,65.93,Comm&Mgmt,False,0,66.2,Mkt&Fin,55.28,30837.72,True,2020-07-07,2020-10-16,2.92
162,162,"84329 Moyer Locks\nWillisfort, VA 14961",M,62.63,59.72,Science,59.71,Comm&Mgmt,True,0,58.21,Mkt&Fin,56.09,28275.02,True,2020-01-16,2020-08-28,5.46
