# Question (a)

In [1]:
not.installed <- function(pkg) !is.element(pkg, installed.packages()[,1])
    
if (not.installed("Rmisc"))  install.packages("Rmisc", repos="http://cran.us.r-project.org")
if (not.installed("foreign"))  install.packages("Rmisc", repos="http://cran.us.r-project.org")
if (not.installed("ggplot2"))  install.packages("Rmisc", repos="http://cran.us.r-project.org")

library(Rmisc)
library(foreign)
library(ggplot2)

: package 'Rmisc' was built under R version 3.2.5Loading required package: lattice
Loading required package: plyr
: package 'ggplot2' was built under R version 3.2.5

In [44]:
data.dir <- "./data/"  # file.path("data", "roll_call")
data.files <- list.files(data.dir, pattern = ".dta" )
# Add all roll call vote data frames to a single list
rollcall.data <- lapply(data.files,
                        function(f)
                        {
                          read.dta(file.path(data.dir, f), convert.factors = FALSE)
                        })
# This function takes a single data frame of roll call votes and returns a 
# Senator-by-vote matrix.
rollcall.simplified <- function(df)
{
  no.pres <- subset(df, state < 99)
  
  for(i in 10:ncol(no.pres))
  {
    no.pres[,i] <- ifelse(no.pres[,i] > 6, 0, no.pres[,i])
    no.pres[,i] <- ifelse(no.pres[,i] > 0 & no.pres[,i] < 4, 1, no.pres[,i])
    no.pres[,i] <- ifelse(no.pres[,i] > 1, -1, no.pres[,i])
  }
  
  return(as.matrix(no.pres[,10:ncol(no.pres)]))
}

rollcall.simple <- lapply(rollcall.data, rollcall.simplified)
# Multiply the matrix by its transpose to get Senator-to-Senator tranformation, 
# and calculate the Euclidan distance between each Senator.
rollcall.dist <- lapply(rollcall.simple, function(m) dist(m %*% t(m)))

# Do the multidimensional scaling
rollcall.mds <- lapply(rollcall.dist,
                       function(d) as.data.frame((cmdscale(d, k = 2)) * -1))
                           # Add identification information about Senators back into MDS data frames
congresses <- 102:113

for(i in 1:length(rollcall.mds))
{
  names(rollcall.mds[[i]]) <- c("x", "y")
  
  congress <- subset(rollcall.data[[i]], state < 99)
  
  congress.names <- sapply(as.character(congress$name),
                           function(n) strsplit(n, "[, ]")[[1]][1])
  
  rollcall.mds[[i]] <- transform(rollcall.mds[[i]],
                                 name = congress.names,
                                 party = as.factor(congress$party),
                                 congress = congresses[i])
}

head(rollcall.mds[[1]])



Unnamed: 0,x,y,name,party,congress
2,13.63947,318.0718,HEFLIN,100,102
3,142.4457,354.3245,SHELBY,100,102
4,1711.213,301.3357,MURKOWSKI,200,102
5,1419.277,357.0446,STEVENS,200,102
6,-889.714,23.77178,DECONCINI,100,102
7,1731.366,158.597,MCCAIN,200,102


In [46]:
S_list = list()
index_list = list()
for (k in 1:12)
{
    dem = subset(rollcall.mds[[k]],party==100) 
    dem_scale_x = max(abs(range(dem$x)))
    dem_scale_y = max(abs(range(dem$y)))
    dem$x = (dem$x)/dem_scale_x
    dem$y = (dem$y)/dem_scale_y
    
    rep = subset(rollcall.mds[[k]],party==200)
    rep_scale_x = max(abs(range(rep$x)))
    rep_scale_y = max(abs(range(rep$y)))
    rep$x = (rep$x)/rep_scale_x
    rep$y = (rep$y)/rep_scale_y
    #ind = subset(rollcall.mds[[k]],party==328)

    mu1 = c(mean(dem$x),mean(dem$y))
    mu2 = c(mean(rep$x),mean(rep$y))
    s1_square = c(var(dem$x),var(dem$y))
    s2_square = c(var(rep$x),var(rep$y))
    S = (sum((mu1-mu2)^2))/(sum(s1_square)+sum(s2_square))
    
    S_list[k] = S
    index_list[k] = 101 + k
}

In [47]:
jpeg(filename="polarization.jpg")
plot(index_list , S_list, 
     main="Polarization from 102th to 113th congress", 
     xlab="Congress", ylab="Polarization", type="b")
dev.off()