In [1]:
import pandas
import seaborn
import numpy

import sklearn.preprocessing

%matplotlib inline

# Instructions

- Read both training and test data from CSV files
- Then, identify which columns are numerical and which columns are categorical
    - Note that some of numerical feature are actually categorical feature
- Assess whether the data is balance or imbalance

In [2]:
data_train = pandas.read_csv('../data/credit.train.csv').set_index('id')
data_train

Unnamed: 0_level_0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
253,no checking,24,delayed previously,furniture/equipment,4151,100<=X<500,1<=X<4,2,male single,none,...,life insurance,35,none,own,2,skilled,1,none,yes,good
667,no checking,48,all paid,business,3609,<100,1<=X<4,1,female div/dep/mar,none,...,real estate,27,stores,own,1,skilled,1,none,yes,good
85,no checking,12,critical/other existing credit,business,1412,<100,1<=X<4,4,female div/dep/mar,guarantor,...,real estate,29,none,own,2,high qualif/self emp/mgmt,1,yes,yes,good
969,<0,11,critical/other existing credit,new car,3939,<100,1<=X<4,1,male single,none,...,real estate,40,none,own,2,unskilled resident,2,none,yes,good
75,<0,12,critical/other existing credit,used car,1526,<100,>=7,4,male single,none,...,no known property,66,none,for free,2,high qualif/self emp/mgmt,1,none,yes,good
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835,<0,12,no credits/all paid,new car,1082,<100,1<=X<4,4,male single,none,...,car,48,bank,own,2,skilled,1,none,yes,bad
192,0<=X<200,27,existing paid,business,3915,<100,1<=X<4,4,male single,none,...,car,36,none,own,1,skilled,2,yes,yes,bad
629,no checking,9,existing paid,education,3832,no known savings,>=7,1,male single,none,...,real estate,64,none,own,1,unskilled resident,1,none,yes,good
559,0<=X<200,18,critical/other existing credit,furniture/equipment,1928,<100,<1,2,male single,none,...,real estate,31,none,own,2,unskilled resident,1,none,yes,bad


In [3]:
data_test = pandas.read_csv('../data/credit.test.csv').set_index('id')
data_test

Unnamed: 0_level_0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
993,<0,36,existing paid,furniture/equipment,3959,<100,unemployed,4,male single,none,...,life insurance,30,none,own,1,high qualif/self emp/mgmt,1,yes,yes,good
859,no checking,9,existing paid,new car,3577,100<=X<500,1<=X<4,1,male single,guarantor,...,real estate,26,none,rent,1,skilled,2,none,no,good
298,no checking,18,existing paid,furniture/equipment,2515,<100,1<=X<4,3,male single,none,...,real estate,43,none,own,1,skilled,1,yes,yes,good
553,0<=X<200,12,critical/other existing credit,new car,1995,100<=X<500,<1,4,male single,none,...,car,27,none,own,1,skilled,1,none,yes,good
672,no checking,60,existing paid,new car,10366,<100,>=7,2,male single,none,...,life insurance,42,none,own,1,high qualif/self emp/mgmt,1,yes,yes,good
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
462,0<=X<200,12,existing paid,furniture/equipment,3017,<100,<1,3,female div/dep/mar,none,...,real estate,34,none,rent,1,high qualif/self emp/mgmt,1,none,yes,good
356,no checking,12,critical/other existing credit,radio/tv,2331,no known savings,>=7,1,male single,co applicant,...,real estate,49,none,own,1,skilled,1,yes,yes,good
2,no checking,12,critical/other existing credit,education,2096,<100,4<=X<7,2,male single,none,...,real estate,49,none,own,1,unskilled resident,2,none,yes,good
478,0<=X<200,12,existing paid,business,1037,100<=X<500,4<=X<7,3,male single,none,...,real estate,39,none,own,1,unskilled resident,1,none,yes,good


In [4]:
data_train.dtypes

checking_status           object
duration                   int64
credit_history            object
purpose                   object
credit_amount              int64
savings_status            object
employment                object
installment_commitment     int64
personal_status           object
other_parties             object
residence_since            int64
property_magnitude        object
age                        int64
other_payment_plans       object
housing                   object
existing_credits           int64
job                       object
num_dependents             int64
own_telephone             object
foreign_worker            object
label                     object
dtype: object

In [5]:
categorical_features = [
    'checking_status', 
    'credit_history', 
    'purpose', 
    'savings_status',
    'employment', 
    'personal_status', 
    'other_parties', 
    'property_magnitude',
    'other_payment_plans', 
    'housing', 
    'job', 
    'own_telephone',
    'foreign_worker',
    'installment_commitment', 
    'residence_since', 
    'existing_credits', 
    'num_dependents'
]

for feature in categorical_features:
    possible_values = data_train[feature].unique()
    print(f"{feature:20s} {possible_values.size:2d} {possible_values}")

checking_status       4 ['no checking' '<0' '0<=X<200' '>=200']
credit_history        5 ['delayed previously' 'all paid' 'critical/other existing credit'
 'existing paid' 'no credits/all paid']
purpose              10 ['furniture/equipment' 'business' 'new car' 'used car' 'radio/tv'
 'domestic appliance' 'repairs' 'other' 'education' 'retraining']
savings_status        5 ['100<=X<500' '<100' '500<=X<1000' '>=1000' 'no known savings']
employment            5 ['1<=X<4' '>=7' '<1' '4<=X<7' 'unemployed']
personal_status       4 ['male single' 'female div/dep/mar' 'male mar/wid' 'male div/sep']
other_parties         3 ['none' 'guarantor' 'co applicant']
property_magnitude    4 ['life insurance' 'real estate' 'no known property' 'car']
other_payment_plans   3 ['none' 'stores' 'bank']
housing               3 ['own' 'for free' 'rent']
job                   4 ['skilled' 'high qualif/self emp/mgmt' 'unskilled resident'
 'unemp/unskilled non res']
own_telephone         2 ['none' 'yes']
foreign_wo

In [6]:
data_train['label'].value_counts()

label
good    524
bad     226
Name: count, dtype: int64

In [7]:
data_test['label'].value_counts()

label
good    176
bad      74
Name: count, dtype: int64

# Instructions
- Investigate whether the dataset contains any missing data

In [8]:
data_train.isna().sum(axis='index')

checking_status           0
duration                  0
credit_history            0
purpose                   0
credit_amount             0
savings_status            0
employment                0
installment_commitment    0
personal_status           0
other_parties             0
residence_since           0
property_magnitude        0
age                       0
other_payment_plans       0
housing                   0
existing_credits          0
job                       0
num_dependents            0
own_telephone             0
foreign_worker            0
label                     0
dtype: int64

In [9]:
data_test.isna().sum(axis='index')

checking_status           0
duration                  0
credit_history            0
purpose                   0
credit_amount             0
savings_status            0
employment                0
installment_commitment    0
personal_status           0
other_parties             0
residence_since           0
property_magnitude        0
age                       0
other_payment_plans       0
housing                   0
existing_credits          0
job                       0
num_dependents            0
own_telephone             0
foreign_worker            0
label                     0
dtype: int64

# Instructions
- Create 3 `pandas.DataFrame`s
    - `pandas.DataFrame` of only numerical features. Also, apply `sklearn.preprocessing.StandardScaler`.
    - `pandas.DataFrame` of only ordinal features encoded using `sklearn.preprocessing.OrdinalEncoder`
    - `pandas.DataFrame` of labels encoded using `sklearn.preprocessing.OrdinalEncoder` 
       - Map {`good`, `bad`} to {0, 1} respectively
- Note that these **filling parameters** should be applied to the test data as well
- Then, combine the 3 `pandas.DataFrame` to obtain the fully processed training and test data
  - Hint: use `.join`


## For numerical features
- Define and select a subset of numerical features for training and test data
- Use `sklearn.preprocessing.StandardScaler` to create a standard scaler, then fit using the training data
- Create a new `pandas.DataFrame` of only numerically encoded features for both training and test data

In [10]:
numerical_features = [
    'duration',
    'credit_amount',
    'age',
]

Unnamed: 0_level_0,duration,credit_amount,age
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
253,24,4151,35
667,48,3609,27
85,12,1412,29
969,11,3939,40
75,12,1526,66
...,...,...,...
835,12,1082,48
192,27,3915,36
629,9,3832,64
559,18,1928,31


Unnamed: 0_level_0,duration,credit_amount,age
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
993,36,3959,30
859,9,3577,26
298,18,2515,43
553,12,1995,27
672,60,10366,42
...,...,...,...
462,12,3017,34
356,12,2331,49
2,12,2096,49
478,12,1037,39


Unnamed: 0_level_0,duration,credit_amount,age
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
253,0.277814,0.320904,-0.080669
667,2.289342,0.130313,-0.765274
85,-0.727949,-0.642249,-0.594123
969,-0.811763,0.246356,0.347209
75,-0.727949,-0.602162,2.572174
...,...,...,...
835,-0.727949,-0.758292,1.031813
192,0.529255,0.237916,0.004906
629,-0.979390,0.208730,2.401022
559,-0.225068,-0.460801,-0.422972


Unnamed: 0_level_0,duration,credit_amount,age
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
993,1.283578,0.253389,-0.508547
859,-0.979390,0.119061,-0.850849
298,-0.225068,-0.254385,0.603935
553,-0.727949,-0.437240,-0.765274
672,3.295105,2.506373,0.518360
...,...,...,...
462,-0.727949,-0.077860,-0.166245
356,-0.727949,-0.319088,1.117389
2,-0.727949,-0.401724,1.117389
478,-0.727949,-0.774116,0.261633


## For ordinal features
- Define a subset of ordinal features
- Define a list of ordinal maps which sort the feature values in appropriate orders
- Use `sklearn.preprocessing.OrdinalEncoder` to create an ordinal encoder, then fit using the training data
- Create a new `pandas.DataFrame` of only ordinally encoded features for both training and test data

In [17]:
ordinal_features = [
    'checking_status', 
    'credit_history', 
    'purpose', 
    'savings_status',
    'employment', 
    'personal_status', 
    'other_parties', 
    'property_magnitude',
    'other_payment_plans', 
    'housing', 
    'job', 
    'own_telephone',
    'foreign_worker',
    'installment_commitment',
    'residence_since',
    'existing_credits',
    'num_dependents',
]

In [18]:
ordinal_mapping = [
    ['no checking', '<0', '0<=X<200', '>=200'],
    ['delayed previously', 'all paid', 'critical/other existing credit', 'existing paid', 'no credits/all paid'],
    ['furniture/equipment', 'business', 'new car', 'used car', 'radio/tv', 'domestic appliance', 'repairs', 'education', 'retraining', 'other'],
    ['no known savings', '<100', '100<=X<500', '500<=X<1000', '>=1000'],
    ['unemployed', '<1', '1<=X<4', '4<=X<7', '>=7'],
    ['male single', 'female div/dep/mar', 'male mar/wid', 'male div/sep'],
    ['none', 'guarantor', 'co applicant'],
    ['no known property', 'life insurance', 'car', 'real estate'],
    ['none', 'stores', 'bank'],
    ['for free', 'rent', 'own'],
    [ 'unemp/unskilled non res', 'unskilled resident', 'skilled', 'high qualif/self emp/mgmt'],
    ['none', 'yes'],
    ['no', 'yes'],
    [1, 2, 3, 4],
    [1.0, 2.0, 3.0, 4.0],
    [1.0, 2.0, 3.0, 4.0],
    [1, 2],
]

Unnamed: 0_level_0,checking_status,credit_history,purpose,savings_status,employment,personal_status,other_parties,property_magnitude,other_payment_plans,housing,job,own_telephone,foreign_worker,installment_commitment,residence_since,existing_credits,num_dependents
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
253,0.0,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,2.0,2.0,0.0,1.0,1.0,2.0,1.0,0.0
667,0.0,1.0,1.0,1.0,2.0,1.0,0.0,3.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0
85,0.0,2.0,1.0,1.0,2.0,1.0,1.0,3.0,0.0,2.0,3.0,1.0,1.0,3.0,1.0,1.0,0.0
969,1.0,2.0,2.0,1.0,2.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0
75,1.0,2.0,3.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,3.0,3.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835,1.0,4.0,2.0,1.0,2.0,0.0,0.0,2.0,2.0,2.0,2.0,0.0,1.0,3.0,3.0,1.0,0.0
192,2.0,3.0,1.0,1.0,2.0,0.0,0.0,2.0,0.0,2.0,2.0,1.0,1.0,3.0,1.0,0.0,1.0
629,0.0,3.0,7.0,0.0,4.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0
559,2.0,2.0,0.0,1.0,1.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0


Unnamed: 0_level_0,checking_status,credit_history,purpose,savings_status,employment,personal_status,other_parties,property_magnitude,other_payment_plans,housing,job,own_telephone,foreign_worker,installment_commitment,residence_since,existing_credits,num_dependents
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
993,1.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,2.0,3.0,1.0,1.0,3.0,2.0,0.0,0.0
859,0.0,3.0,2.0,2.0,2.0,0.0,1.0,3.0,0.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0
298,0.0,3.0,0.0,1.0,2.0,0.0,0.0,3.0,0.0,2.0,2.0,1.0,1.0,2.0,3.0,0.0,0.0
553,2.0,2.0,2.0,2.0,1.0,0.0,0.0,2.0,0.0,2.0,2.0,0.0,1.0,3.0,0.0,0.0,0.0
672,0.0,3.0,2.0,1.0,4.0,0.0,0.0,1.0,0.0,2.0,3.0,1.0,1.0,1.0,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
462,2.0,3.0,0.0,1.0,1.0,1.0,0.0,3.0,0.0,1.0,3.0,0.0,1.0,2.0,0.0,0.0,0.0
356,0.0,2.0,4.0,0.0,4.0,0.0,2.0,3.0,0.0,2.0,2.0,1.0,1.0,0.0,3.0,0.0,0.0
2,0.0,2.0,7.0,1.0,3.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0,1.0,1.0,2.0,0.0,1.0
478,2.0,3.0,1.0,2.0,3.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0,1.0,2.0,3.0,0.0,0.0


## For ordinal labels
- Define a subset of ordinal features
- Use `sklearn.preprocessing.OrdinalEncoder` to create an ordinal encoder, then fit using the training data
  - Map {`good`, `bad`} to {0, 1} respectively
- Create a new `pandas.DataFrame` of ordinally encoded labels for both training and test data

Unnamed: 0_level_0,label
id,Unnamed: 1_level_1
253,0.0
667,0.0
85,0.0
969,0.0
75,0.0
...,...
835,1.0
192,1.0
629,0.0
559,1.0


Unnamed: 0_level_0,label
id,Unnamed: 1_level_1
993,0.0
859,0.0
298,0.0
553,0.0
672,0.0
...,...
462,0.0
356,0.0
2,0.0
478,0.0


## Reconstruction of data sets
- Combine the 3 `pandas.DataFrame` to obtain the fully processed training and test data
  - Hint: use `.join`

Unnamed: 0_level_0,duration,credit_amount,age,checking_status,credit_history,purpose,savings_status,employment,personal_status,other_parties,...,other_payment_plans,housing,job,own_telephone,foreign_worker,installment_commitment,residence_since,existing_credits,num_dependents,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
253,0.277814,0.320904,-0.080669,0.0,0.0,0.0,2.0,2.0,0.0,0.0,...,0.0,2.0,2.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0
667,2.289342,0.130313,-0.765274,0.0,1.0,1.0,1.0,2.0,1.0,0.0,...,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
85,-0.727949,-0.642249,-0.594123,0.0,2.0,1.0,1.0,2.0,1.0,1.0,...,0.0,2.0,3.0,1.0,1.0,3.0,1.0,1.0,0.0,0.0
969,-0.811763,0.246356,0.347209,1.0,2.0,2.0,1.0,2.0,0.0,0.0,...,0.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0
75,-0.727949,-0.602162,2.572174,1.0,2.0,3.0,1.0,4.0,0.0,0.0,...,0.0,0.0,3.0,0.0,1.0,3.0,3.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835,-0.727949,-0.758292,1.031813,1.0,4.0,2.0,1.0,2.0,0.0,0.0,...,2.0,2.0,2.0,0.0,1.0,3.0,3.0,1.0,0.0,1.0
192,0.529255,0.237916,0.004906,2.0,3.0,1.0,1.0,2.0,0.0,0.0,...,0.0,2.0,2.0,1.0,1.0,3.0,1.0,0.0,1.0,1.0
629,-0.979390,0.208730,2.401022,0.0,3.0,7.0,0.0,4.0,0.0,0.0,...,0.0,2.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0
559,-0.225068,-0.460801,-0.422972,2.0,2.0,0.0,1.0,1.0,0.0,0.0,...,0.0,2.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0


Unnamed: 0_level_0,duration,credit_amount,age,checking_status,credit_history,purpose,savings_status,employment,personal_status,other_parties,...,other_payment_plans,housing,job,own_telephone,foreign_worker,installment_commitment,residence_since,existing_credits,num_dependents,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
993,1.283578,0.253389,-0.508547,1.0,3.0,0.0,1.0,0.0,0.0,0.0,...,0.0,2.0,3.0,1.0,1.0,3.0,2.0,0.0,0.0,0.0
859,-0.979390,0.119061,-0.850849,0.0,3.0,2.0,2.0,2.0,0.0,1.0,...,0.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
298,-0.225068,-0.254385,0.603935,0.0,3.0,0.0,1.0,2.0,0.0,0.0,...,0.0,2.0,2.0,1.0,1.0,2.0,3.0,0.0,0.0,0.0
553,-0.727949,-0.437240,-0.765274,2.0,2.0,2.0,2.0,1.0,0.0,0.0,...,0.0,2.0,2.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0
672,3.295105,2.506373,0.518360,0.0,3.0,2.0,1.0,4.0,0.0,0.0,...,0.0,2.0,3.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
462,-0.727949,-0.077860,-0.166245,2.0,3.0,0.0,1.0,1.0,1.0,0.0,...,0.0,1.0,3.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0
356,-0.727949,-0.319088,1.117389,0.0,2.0,4.0,0.0,4.0,0.0,2.0,...,0.0,2.0,2.0,1.0,1.0,0.0,3.0,0.0,0.0,0.0
2,-0.727949,-0.401724,1.117389,0.0,2.0,7.0,1.0,3.0,0.0,0.0,...,0.0,2.0,1.0,0.0,1.0,1.0,2.0,0.0,1.0,0.0
478,-0.727949,-0.774116,0.261633,2.0,3.0,1.0,2.0,3.0,0.0,0.0,...,0.0,2.0,1.0,0.0,1.0,2.0,3.0,0.0,0.0,0.0


# Instructions
- Write the train data set to `./data/features.train.csv`
- Write the test data set to `./data/features.test.csv`