In [116]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

census = pd.read_csv('census_data.csv')

census.head()

Unnamed: 0,age,workclass,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
0,39,State-gov,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [97]:
census['income_bracket'].unique()

array([' <=50K', ' >50K'], dtype=object)

In [98]:
def label_fix(label):
    if label == ' <=50K':
        return 0
    else:
        return 1

census['income_bracket'] = census['income_bracket'].apply(label_fix)

In [99]:
x_data = census.drop('income_bracket',axis=1)
y_label = census['income_bracket']

In [100]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_label, test_size=0.33, random_state=42)

In [101]:
census.head(100)

Unnamed: 0,age,workclass,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
0,39,State-gov,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,0
1,50,Self-emp-not-inc,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,0
2,38,Private,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,0
3,53,Private,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,0
4,28,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,0
5,37,Private,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,0
6,49,Private,9th,5,Married-spouse-absent,Other-service,Not-in-family,Black,Female,0,0,16,Jamaica,0
7,52,Self-emp-not-inc,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,1
8,31,Private,Masters,14,Never-married,Prof-specialty,Not-in-family,White,Female,14084,0,50,United-States,1
9,42,Private,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,5178,0,40,United-States,1


In [102]:
workclass = tf.feature_column.categorical_column_with_hash_bucket('workclass',hash_bucket_size=10)
education = tf.feature_column.categorical_column_with_hash_bucket('education',hash_bucket_size=10)
marital_status = tf.feature_column.categorical_column_with_hash_bucket('marital_status',hash_bucket_size=10)
occupation = tf.feature_column.categorical_column_with_hash_bucket('occupation',hash_bucket_size=10)
relationship = tf.feature_column.categorical_column_with_hash_bucket('relationship',hash_bucket_size=10)
gender = tf.feature_column.categorical_column_with_hash_bucket('gender',hash_bucket_size=10)
native_country = tf.feature_column.categorical_column_with_hash_bucket('native_country',hash_bucket_size=10)
race = tf.feature_column.categorical_column_with_hash_bucket('race',hash_bucket_size=10)


In [103]:
age = tf.feature_column.numeric_column('age')
education_num = tf.feature_column.numeric_column('education_num')
capital_loss = tf.feature_column.numeric_column('capital_loss')
capital_gain = tf.feature_column.numeric_column('capital_gain')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')
census.columns

Index(['age', 'workclass', 'education', 'education_num', 'marital_status',
       'occupation', 'relationship', 'race', 'gender', 'capital_gain',
       'capital_loss', 'hours_per_week', 'native_country', 'income_bracket'],
      dtype='object')

In [104]:
feat_cols = [age, workclass, education, education_num, marital_status,
       occupation, relationship, race, gender, capital_gain,
       capital_loss, hours_per_week, native_country]

In [105]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpjdk83dfj', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f80c1ce05c0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [106]:
input_func = tf.estimator.inputs.pandas_input_fn(x=x_train,y=y_train,batch_size=100,num_epochs=None,shuffle=True)
model.train(input_fn=input_func,steps=10000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpjdk83dfj/model.ckpt.
INFO:tensorflow:loss = 69.31472, step = 1
INFO:tensorflow:global_step/sec: 142.421
INFO:tensorflow:loss = 907.8637, step = 101 (0.703 sec)
INFO:tensorflow:global_step/sec: 261.026
INFO:tensorflow:loss = 306.81628, step = 201 (0.383 sec)
INFO:tensorflow:global_step/sec: 262.576
INFO:tensorflow:loss = 463.83304, step = 301 (0.382 sec)
INFO:tensorflow:global_step/sec: 263.206
INFO:tensorflow:loss = 152.8155, step = 401 (0.378 sec)
INFO:tensorflow:global_step/sec: 268.957
INFO:tensorflow:loss = 66.75195, step = 501 (0.372 sec)
INFO:tensorflow:global_step/sec: 253.675
INFO:tensorflow:loss = 600.45264, step = 601 (0.397 sec)
INFO:tensorflow:global_step/sec: 248.111
INFO:tensorflow:loss

INFO:tensorflow:global_step/sec: 258.955
INFO:tensorflow:loss = 30.525944, step = 8101 (0.387 sec)
INFO:tensorflow:global_step/sec: 250.415
INFO:tensorflow:loss = 51.32714, step = 8201 (0.399 sec)
INFO:tensorflow:global_step/sec: 259.588
INFO:tensorflow:loss = 24.299564, step = 8301 (0.385 sec)
INFO:tensorflow:global_step/sec: 256.399
INFO:tensorflow:loss = 74.524826, step = 8401 (0.390 sec)
INFO:tensorflow:global_step/sec: 253.152
INFO:tensorflow:loss = 81.45721, step = 8501 (0.396 sec)
INFO:tensorflow:global_step/sec: 247.289
INFO:tensorflow:loss = 29.812998, step = 8601 (0.406 sec)
INFO:tensorflow:global_step/sec: 253.931
INFO:tensorflow:loss = 42.728226, step = 8701 (0.398 sec)
INFO:tensorflow:global_step/sec: 254.383
INFO:tensorflow:loss = 36.72488, step = 8801 (0.387 sec)
INFO:tensorflow:global_step/sec: 245.327
INFO:tensorflow:loss = 36.23736, step = 8901 (0.408 sec)
INFO:tensorflow:global_step/sec: 258.824
INFO:tensorflow:loss = 27.515293, step = 9001 (0.393 sec)
INFO:tensorflo

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x7f80c1ce0898>

In [107]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=x_test,y=y_test,batch_size=100,num_epochs=None,shuffle=False)
model.evaluate(eval_input_func,steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-13-17:22:34
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpjdk83dfj/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [100/1000]
INFO:tensorflow:Evaluation [200/1000]
INFO:tensorflow:Evaluation [300/1000]
INFO:tensorflow:Evaluation [400/1000]
INFO:tensorflow:Evaluation [500/1000]
INFO:tensorflow:Evaluation [600/1000]
INFO:tensorflow:Evaluation [700/1000]
INFO:tensorflow:Evaluation [800/1000]
INFO:tensorflow:Evaluation [900/1000]
INFO:tensorflow:Evaluation [1000/1000]
INFO:tensorflow:Finished evaluation at 2018-07-13-17:22:39
INFO:tensorflow:Saving dict for global step 10000: accuracy = 0.8349, accuracy_baseline = 0.76242, auc = 0.8853708, auc_precision_recall = 0.7220057, average_loss = 0.35393175, global_step = 10000, label/mean = 0.23758, loss = 35.39

{'accuracy': 0.8349,
 'accuracy_baseline': 0.76242,
 'auc': 0.8853708,
 'auc_precision_recall': 0.7220057,
 'average_loss': 0.35393175,
 'label/mean': 0.23758,
 'loss': 35.393177,
 'precision': 0.7265,
 'prediction/mean': 0.20612812,
 'recall': 0.48926678,
 'global_step': 10000}

In [123]:
predict_input_func = tf.estimator.inputs.pandas_input_fn(x_test,shuffle=False)
prediction = model.predict(predict_input_func)

In [124]:
preds = list(prediction)
final_predictions = [pred['class_ids'][0] for pred in preds]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpjdk83dfj/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [125]:
print(classification_report(y_test,final_predictions))

             precision    recall  f1-score   support

          0       0.86      0.94      0.90      8196
          1       0.73      0.49      0.58      2550

avg / total       0.82      0.83      0.82     10746

