<h1>Political Party prediction</h1>

In [1]:
import pandas as pd

In [2]:
feature_names = [
   "party",
   "handicapped-infants",
   "water-project-cost-sharing",
   "adoption-of-the-budget-resolution",
   "physician-fee-freeze",
   "el-salvador-aid",
   "religious-groups-in-schools",
   "anti-satellite-test-ban",
   "aid-to-nicaraguan-contras",
   "mx-missile",
   "immigration",
   "synfuels-corporation-cutback",
   "education-spending",
   "superfund-right-to-sue",
   "crime",
   "duty-free-exports",
   "export-administration-act-south-africa"]
voting_data= pd.read_csv("./politic_parties/house-votes-84.data.txt", na_values=['?'], names=feature_names)
voting_data.head()
voting_data.describe()

Unnamed: 0,party,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
count,435,423,387,424,424,420,424,421,420,413,428,414,404,410,418,407,331
unique,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
top,democrat,n,y,y,n,y,y,y,y,y,y,n,n,y,y,n,y
freq,267,236,195,253,247,212,272,239,242,207,216,264,233,209,248,233,269


Dealing with missing data, and making it <i>Keras-readable</i>:
    <ul>1) Deleting missing values rows, may be biased</ul>

In [3]:
# delete missing data
voting_data.dropna(inplace=True)
voting_data.describe()


Unnamed: 0,party,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
count,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232
unique,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
top,democrat,n,n,y,n,y,y,y,y,n,y,n,n,y,y,n,y
freq,124,136,125,123,119,128,149,124,119,119,128,152,124,127,149,146,189


<ul>2) Turn y & n into 1 & 0 respectively</ul>

In [4]:
# to numbers
voting_data.replace(('y', 'n'), (1, 0), inplace=True)
voting_data.replace(('democrat', 'republican'), (1, 0), inplace=True)


voting_data.head()


Unnamed: 0,party,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
5,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,1,1
8,0,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1
19,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1
23,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,1,1
25,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1


extract features and labels

In [5]:
all_features = voting_data[feature_names].values
all_classes = voting_data['party'].values




<h2>Model construction</h2>

In [17]:
from keras.layers import Dense, Dropout
from keras.models import Sequential
from sklearn.model_selection import cross_val_score

def create_model():
    model = Sequential()
    model.add(Dense(32, input_dim=17, kernel_initializer='normal', activation='sigmoid'))
    model.add(Dense(16, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model

from keras.wrappers.scikit_learn import KerasClassifier

# scikit_learn estimator 
estimator = KerasClassifier(build_fn=create_model, nb_epoch=100, verbose=0)
cv_scores = cross_val_score(estimator, all_features, all_classes, cv=10)
cv_scores.mean()

0.5550724595785141