In [2]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import plotly.express as px

# URL for the UCI Congressional Voting Records dataset.
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/voting-records/house-votes-84.data"

# Define column names: first column is 'party' then there are 16 vote columns.
columns = [
    "party", "vote1", "vote2", "vote3", "vote4", "vote5", "vote6", "vote7",
    "vote8", "vote9", "vote10", "vote11", "vote12", "vote13", "vote14", "vote15", "vote16"
]

# Load the dataset.
df = pd.read_csv(url, header=None, names=columns)

# Map vote values: "y" -> 1 (Yes), "n" -> 0 (No), "?" -> np.nan (Missing)
vote_mapping = {"y": 1, "n": 0, "?": np.nan}
for col in columns[1:]:
    df[col] = df[col].map(vote_mapping)

# Impute missing values with a neutral value. Here, 0.5 is used as a midpoint between 0 and 1.
df_imputed = df.fillna(0.5)

# Prepare the feature matrix X (voting records) excluding the party column.
X = df_imputed[columns[1:]].values

# Perform PCA to reduce the multidimensional voting space to 2 components.
pca = PCA(n_components=2)
pca_result = pca.fit_transform(X)

# Create a DataFrame for plotting the PCA results.
results_df = pd.DataFrame({
    "PCA Component 1": pca_result[:, 0],
    "PCA Component 2": pca_result[:, 1],
    "Party": df_imputed["party"]
})

# Define a custom color mapping: Democrats are blue, Republicans are red.
color_mapping = {"democrat": "blue", "republican": "red"}

# Create an interactive scatter plot with Plotly using the custom color mapping.
fig = px.scatter(
    results_df,
    x="PCA Component 1",
    y="PCA Component 2",
    color="Party",
    color_discrete_map=color_mapping,
    title="PCA of US Congressional Voting Records",
    hover_data=["Party"]
)

fig.update_traces(marker=dict(size=10, opacity=0.7))
fig.update_layout(hovermode="closest")
fig.show()

# Optionally, save the interactive plot as an HTML file:
fig.write_html("congressional_voting_pca.html")
