In [None]:
# Import our dependencies
import pandas as pd
import matplotlib as plt
import sklearn as skl
import tensorflow as tf
import numpy as np
import tensorflow.keras as keras
from sklearn.preprocessing import StandardScaler

In [None]:
file_path = "/Users/joshzaragoza/Arizona_Election_Project/Resources/Voter Data/Leg_District4.csv"
df_voters = pd.read_csv(file_path)
df_voters.head(10)

In [None]:
df_voters.drop(columns=["Zip","LastName","FirstName","MiddleName","Partisanship Score"], inplace=True)
df_voters

In [None]:
# Columns
df_voters.count()

In [None]:
df_voters.dtypes

In [None]:
df_voters.isnull()

In [None]:
#drop any null or nan in the data frame 
df_voters.dropna(axis = 0, how = "any", thresh = None, subset = None, inplace=True)
df_voters

In [None]:
# Convert Float64 to int dtypes
df_voters = df_voters.astype({'Voter Score':'int','Turnout Score':'int','Kids in HH':'int','Liberal Ideology':'int'})
df_voters

In [None]:
# Rename column 1
df_voters.rename(columns={'PartyName':'Party'}, inplace=True)
df_voters.head()

In [None]:
# Create a list of our conditions
conditions = [
    (df_voters['Voter Score'] <= 34),
    (df_voters['Voter Score'] >= 35) & (df_voters['Voter Score'] <= 65),
    (df_voters['Voter Score'] >= 66) & (df_voters['Voter Score'] <=100)
    ]

# Create of values we want assigned to the conditions
values = ['No', 'Yes', 'No']

# Create a new column with np.select to assign values to it using our lists as arguments
df_voters['Swing Voter'] = np.select(conditions, values)

# Display updated DataFrame
df_voters

In [None]:
df_voters.drop(columns=["Voter Score"], inplace=True)
df_voters

In [None]:
# Create a list of our conditions
conditions = [
    (df_voters['Age'] >= 18) & (df_voters['Age'] <= 24),
    (df_voters['Age'] >= 25) & (df_voters['Age'] <= 34),
    (df_voters['Age'] >= 35) & (df_voters['Age'] <=44),
    (df_voters['Age'] >= 45) & (df_voters['Age'] <=54),
    (df_voters['Age'] >= 55) & (df_voters['Age'] <=64),
    (df_voters['Age'] >= 65),
    ]

# Create of values we want assigned to the conditions
values = ['18-24', '25-34', '35-44','45-54','55-64','65+']

# Create a new column with np.select to assign values to it using our lists as arguments
df_voters['Age'] = np.select(conditions, values)

# Display updated DataFrame
df_voters

In [None]:
file_path = "/Users/joshzaragoza/LD4_transform.csv"
df_voters.to_csv(file_path, index=False)

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import our input dataset
swing_voter_df = pd.read_csv("/Users/joshzaragoza/LD4_transform.csv")
swing_voter_df.head()

In [None]:
swing_voter_df.dtypes

In [None]:
# Generate our categorical variable list
swing_voter_cat = swing_voter_df.dtypes[swing_voter_df.dtypes == "object"].index.tolist()

In [None]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(swing_voter_df[swing_voter_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(swing_voter_cat)
encode_df.head()

In [None]:
# Merge one-hot encoded features and drop the originals
swing_voter_df = swing_voter_df.merge(encode_df,left_index=True, right_index=True)
swing_voter_df = swing_voter_df.drop(swing_voter_cat,1)
swing_voter_df.head()

In [None]:
# Split our preprocessed data into our features and target arrays
y = swing_voter_df["Swing Voter_Yes"].values
X = swing_voter_df.drop(["Swing Voter_Yes","Swing Voter_No"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 12
hidden_nodes_layer2 = 8

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")