In [1]:
import numpy as np
import pandas as pd
from recomm.features import index_single_feature
from recomm.estimator import ClassifierNN

In [2]:
CSV_COLUMNS = [
    "age", "workclass", "fnlwgt", "education", "education_num",
    "marital_status", "occupation", "relationship", "race", "gender",
    "capital_gain", "capital_loss", "hours_per_week", "native_country",
    "income_bracket"
]

In [3]:
train_data = pd.read_csv("data/adult_train.csv", 
                         names=CSV_COLUMNS, 
                         skiprows=1)
test_data = pd.read_csv("data/adult_test.csv", 
                        names=CSV_COLUMNS, 
                        skiprows=1)

To try the case when features are labeled by indices:

In [8]:
indiced_single_feature_data = pd.DataFrame(train_data.copy())
for col in ["workclass", "education", "marital_status", "occupation", "relationship", "race", "gender", "native_country", "income_bracket"]:
    indiced_single_feature_data.loc[:, col] = index_single_feature(indiced_single_feature_data[col].values)[0]
indiced_single_feature_test = pd.DataFrame(test_data.copy())
for col in ["workclass", "education", "marital_status", "occupation", "relationship", "race", "gender", "native_country", "income_bracket"]:
    indiced_single_feature_test.loc[:, col] = index_single_feature(indiced_single_feature_test[col].values)[0]

To rescale features

In [9]:
indiced_single_feature_data = indiced_single_feature_data.apply(lambda x: (x - x.min()) / (x.max() - x.min()))
indiced_single_features = indiced_single_feature_data[indiced_single_feature_data.columns[:-1]].values
indiced_single_labels = indiced_single_feature_data[indiced_single_feature_data.columns[-1]].values
indiced_single_feature_test = indiced_single_feature_test.apply(lambda x: (x - x.min()) / (x.max() - x.min()))
indiced_single_features_test = indiced_single_feature_test[indiced_single_feature_test.columns[:-1]].values
indiced_single_labels_test = indiced_single_feature_test[indiced_single_feature_test.columns[-1]].values

In [6]:
indiced_single_features_estimator = ClassifierNN(indiced_single_features, indiced_single_labels)\
    .optimize(learning_rate=0.1)\
    .estimate()

In [7]:
import tensorflow as tf

In [18]:
test_label, = indiced_single_features_estimator._sess.run([indiced_single_features_estimator.estimated_labels],
                                                        feed_dict={indiced_single_features_estimator.sample_features: indiced_single_features_test})

In [19]:
test_label

array([[-2416.75854492],
       [-2974.71606445],
       [-3010.95532227],
       ..., 
       [-5823.18554688],
       [-3483.34863281],
       [-4899.68066406]], dtype=float32)