# Predict political party affiliation based on the votes.

We will use a public dataset to see how US congressmen voted on 17 different issues in 1984, and try to figure out their political affliations based on their votes alone.

In [1]:
import pandas as pd

feature_names =  ['party','handicapped-infants', 'water-project-cost-sharing', 
                    'adoption-of-the-budget-resolution', 'physician-fee-freeze',
                    'el-salvador-aid', 'religious-groups-in-schools',
                    'anti-satellite-test-ban', 'aid-to-nicaraguan-contras',
                    'mx-missle', 'immigration', 'synfuels-corporation-cutback',
                    'education-spending', 'superfund-right-to-sue', 'crime',
                    'duty-free-exports', 'export-administration-act-south-africa']

voting_data = pd.read_csv('/content/drive/My Drive/ML Course materials/house-votes-84.data.txt', na_values=['?'], 
                          names = feature_names)
voting_data.head()

Unnamed: 0,party,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missle,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
0,republican,n,y,n,y,y,y,n,n,n,y,,y,y,y,n,y
1,republican,n,y,n,y,y,y,n,n,n,n,n,y,y,y,n,
2,democrat,,y,y,,y,y,n,n,n,n,y,n,y,y,n,n
3,democrat,n,y,y,n,,y,n,n,n,n,y,n,y,n,n,y
4,democrat,y,y,y,n,y,y,n,n,n,n,y,,y,y,y,y


In [2]:
voting_data.describe()

Unnamed: 0,party,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missle,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
count,435,423,387,424,424,420,424,421,420,413,428,414,404,410,418,407,331
unique,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
top,democrat,n,y,y,n,y,y,y,y,y,y,n,n,y,y,n,y
freq,267,236,195,253,247,212,272,239,242,207,216,264,233,209,248,233,269


As we can see there are a lot of missing data. To fix this we just drop those rows with missing data but to make sure that we dont introduce any biase into our classification

In [3]:
voting_data.dropna(inplace=True)
voting_data.describe()

Unnamed: 0,party,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missle,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
count,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232
unique,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
top,democrat,n,n,y,n,y,y,y,y,n,y,n,n,y,y,n,y
freq,124,136,125,123,119,128,149,124,119,119,128,152,124,127,149,146,189


Now we need to normalise the data we cant feed characters and numbers to the NN. So lets replace all the y's and n's with 1's and 0's. and represent the party as well as 1's and 0's

In [4]:
voting_data.replace(('y','n'), (1,0), inplace=True) #replacing y with 1 and n with 0
voting_data.replace(('democrat','republican'),(1,0),inplace=True) #replacing democrats with 1 and republicans as 0
voting_data.head()

Unnamed: 0,party,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missle,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
5,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,1,1
8,0,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1
19,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1
23,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,1,1
25,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1


Now lets extract the features and labels as Keras want.

In [0]:
features = voting_data[feature_names].drop('party', axis=1).values # slicing off the party as it acts as the classification
classes = voting_data['party'].values

Lets now build our model.

In [27]:
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score #cross val score to evaluate our model

#to pass our model to the cross val scre validator
def create_model():
  model = Sequential()
  #there are 16 inputs so lets make it a 32 neuron in the hidden layer 1
  model.add(Dense(32, input_dim=16, kernel_initializer='normal',activation='relu'))
  model.add(Dropout(0.5))
  #adding another hidden layer of 16 neurons
  model.add(Dense(32, kernel_initializer='normal', activation='relu'))
  model.add(Dropout(0.5))
  #adding the output layer
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

  #compile the model with binary crossentropy as loss function and adam as optimizer.
  model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy'])
  return model

#Wrapping the model to estimator
estimat = KerasClassifier(build_fn=create_model, epochs = 100, verbose=0)#this is to wrap our model within a estimator compatible with scikit-learn

#using cross val score to measure the model's accuracy or identically to others
cv_scores = cross_val_score(estimat, features, classes, cv=10)
cv_scores.mean()

0.9525362372398376