initial commit

kennyjoseph · Oct 7, 2013 · 888f3cb · 888f3cb
commit 888f3cb
Show file tree

Hide file tree

Showing 32 changed files with 1,281,911 additions and 0 deletions.
diff --git a/.Rhistory b/.Rhistory
@@ -0,0 +1,10 @@
+ggsve
+library(ggplot2)
+source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
+source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
+source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
+source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
+traceback()
+install.packages("ggplot2")
+source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
+source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
diff --git a/analysis/.Rhistory b/analysis/.Rhistory
@@ -0,0 +1,10 @@
+source('D:/kjoseph/Dropbox/Kenny/current_papers/current/ArabSpring/github/analysis/gen_results.R')
+source('D:/kjoseph/Dropbox/Kenny/current_papers/current/ArabSpring/github/analysis/gen_results.R')
+require(data.table)
+files = Sys.glob("D:/kjoseph/ArabSpring/final_nets/rev*/30_*/*_agg_out.csv")
+l <- vector("list",length(files))
+for(i in 1:length(files) ){
+l[[i]] <- fread(files[i])
+}
+dt <- rbindlist(l)
+write.csv(dt, "D:/kjoseph/Dropbox/Kenny/current_papers/current/ArabSpring/github/results2.csv")
diff --git a/analysis/analysis.R b/analysis/analysis.R
@@ -0,0 +1,116 @@
+library(data.table)
+library(lubridate)
+library(reshape)
+library(plyr)
+library(stringr)
+library(ggplot2)
+library(scales)
+library(zoo)
+library(reshape2)
+
+theme_set(theme_bw(20))
+source_dir <- "github/"
+
+
+lower_q <- .25
+upper_q <- .75
+params <- c("date","country","tp","group_flip_to_positive","group_flip_to_negative","agent_bias")
+###Get results
+d <- fread(paste0(source_dir,"results_sim_final.csv"))
+country_level <- d[,list(Violence=sum(Violence),Revolution=sum(Revolution)),by=params]
+country_level$date <- ymd(country_level$date)
+country_level$country[country_level$country == "IRAN ISLAMIC REPUBLIC OF"] <- "IRAN"
+country_level$country[country_level$country == "LIBYAN ARAB JAMAHIRIYA"] <- "LIBYA"
+country_level$country[country_level$country == "UNITED ARAB EMIRATES"] <- "U.A.E."
+
+###Plot of results
+cl <- melt(country_level, id=params)
+cl <- dcast(cl,formula(paste(paste(params[params!="tp"],collapse="+"),"variable",sep="~")),value.var="value", function(d){d[2]-d[1]})
+cl <- cl[!is.na(cl$Revolution),]
+cl <- ddply(cl, .(date,country), summarise, Violence = mean(Violence), Revolution=mean(Revolution))
+
+cl <- melt(cl, id=c("date","country"))
+cl <- cl[cl$country != "QATAR" & cl$country !="WESTERNERS",]
+
+
+iqr_per_month <- ddply(cl, .(date,variable), function(f){
+  d <- quantile(f$value,probs=(c(lower_q,.5,upper_q)))
+  data.frame(Lower=d[1],Median=d[2],Upper=d[3])
+})
+
+q <- ddply(cl, .(country,variable), function(d){
+  f <- zoo(d$value, d$date)
+  rev <- data.frame(rollapply(f,3, partial=TRUE,align="right",FUN=function(f){return(quantile(f,probs=c(lower_q,.5,upper_q)))}))
+
+  data.frame(date=ymd(rownames(rev)),low=rev[,1],med=rev[,2],high=rev[,3])
+})
+
+blah <- merge(q,cl)
+blah <- merge(blah, iqr_per_month)
+v <- with(blah,blah[variable=="Violence" & blah$value > high & blah$value > Upper,])
+r <- with(blah,blah[variable=="Revolution" & blah$value < low & blah$value < Lower,])
+res <-merge(r[,c(1:3)],v[,c(1:3)],by=c("date","country"))
+res[!duplicated(res$country),]
+res <- res[!duplicated(res$country),1:2]
+cl$Prediction <- F
+cl[as.vector(unlist(adply(res, 1, function(f){which(cl$country == f$country & cl$date == f$date)})[,c("V1","V2")])),"Prediction"] <- T
+
+
+
+v_plot <- ggplot(cl, aes(date,value,color=variable)) 
+v_plot <- v_plot + theme(axis.text.x=element_text(angle=45,hjust=1), legend.title=element_blank()) + xlab("Month")
+v_plot <- v_plot + geom_line(size=1.3,alpha=.7) + facet_wrap(~country,nrow=4,scales="free_y") 
+v_plot <- v_plot + geom_linerange(data=iqr_per_month,aes(y=Median,ymin=Lower,ymax=Upper),color='black')
+v_plot <- v_plot + geom_linerange(data=q, aes(y=med,ymin=low,ymax=high),color='blue')
+
+v_plot <- v_plot + geom_point(data=cl[cl$Prediction,],color='red', size=5)
+v_plot <- v_plot + ylab("Change in Belief (summation over agents)")
+v_plot <- v_plot + scale_x_datetime(breaks=date_breaks("3 months"), 
+                                    labels=date_format("%b %Y")) 
+
+res_out <- data.frame(country=unique(cl$country),actual="None",stringsAsFactors=F)
+res_out <- merge(res_out,res,all.x=T)
+res_out$date <- as.character(res_out$date)
+res_out$date[is.na(res_out$date)] <- "None"
+res_out$actual[res_out$country=="TUNISIA"] <- "2011-01-01"
+res_out$actual[res_out$country=="EGYPT"] <- "2011-02-01"
+res_out$actual[res_out$country=="LIBYA"] <- "2011-08-01"
+res_out$actual[res_out$country=="YEMEN"] <- "2011-01-01"
+write.csv(res_out, paste0(source_dir,"results_overthrow.csv"))
+
+
+##Get protests data
+protests <- read.csv(paste0(source_dir,"analysis/protest_counts.csv"),stringsAsFactors=F)
+protests[is.na(protests)] <- 0
+protests$date <- mdy(protests$date)
+protests$QATAR <- NULL
+names(protests)[names(protests)=="SAUDI.ARABIA"] <- "SAUDI ARABIA"
+names(protests)[names(protests)=="U.A.E"] <- "U.A.E."
+###Plot of protests
+p1 <- ggplot(melt(protests[,-which(names(protests)=="QATAR")],id="date"), 
+             aes(date,value)) 
+p1 <- p1 + geom_point() + geom_line() + facet_wrap(~variable, nrow=2)
+p1 <- p1 + theme(axis.text.x=element_text(angle=45,hjust=1)) 
+p1 <- p1 + ylab("Number of Protests") + xlab("Month")
+
+###Plot of protests w/ results
+melt_protests <- melt(protests,id="date")
+names(melt_protests) <- c("date","country","protests")
+country_merge <- cl[cl$variable == "Violence",]
+cl2 <- merge(country_merge, melt_protests, by=c("date","country"))
+cl2$Prediction <- NULL
+cl2 <- cl2[cl2$country %in% unique(res$country[! res$country %in% c("IRAQ","IRAN") ]),]
+cl2$variable <- NULL
+cl2 <- melt(cl2, id=c("date","country"))
+cl2 <- ddply(cl2, .(date,country,variable), function(d){abs(d$value)/max(abs(cl2[cl2$country==d$country & cl2$variable==d$variable,]$value))})
+
+prot_plot <- ggplot(cl2, aes(date,V1,color=variable))
+prot_plot <- prot_plot + geom_point() + geom_line() + facet_wrap(~country,nrow=1)
+prot_plot <- prot_plot + theme(axis.text.x=element_text(angle=45, hjust=1))
+prot_plot <- prot_plot + scale_x_datetime(breaks=date_breaks("3 months"), 
+                                          labels=date_format("%b %Y"),
+                                          limits=c(ymd("2010-12-01"),
+                                                   ymd("2011-12-01"))) 
+prot_plot <- prot_plot + xlab("Month") + ylab("Percent of maximum value (per time series)")
+prot_plot <- prot_plot + scale_color_discrete("",labels=c("Revolution Belief","Number of Protests"))
+prot_plot
diff --git a/analysis/gen_results.R b/analysis/gen_results.R
@@ -0,0 +1,118 @@
+require(data.table)
+require(ggplot2)
+require(plyr)
+require(doBy)
+require(reshape)
+require(lubridate)
+require(stringr)
+require(snowfall)
+require(data.table)
+source_dir = "github/"
+
+sfInit(parallel=TRUE,cpus=10)
+sfSource(paste0(source_dir,"analysis/results_functions.R"))
+sfExport("source_dir")
+sfLibrary(data.table)
+sfLibrary(ggplot2)
+sfLibrary(plyr)
+sfLibrary(doBy)
+sfLibrary(reshape)
+sfLibrary(lubridate)
+sfLibrary(stringr)
+####Agent Country Replication Date TP RevB ViolB Rev+K Rev-K Viol+K Viol-K
+
+
+parSapply(sfGetCluster(),Sys.glob("final_nets/rev_*/30*"), function(this_dir){
+  print(this_dir)
+  con <- file(paste0(this_dir,"/out.txt"))
+  z <- readLines(con)
+  if(sum(unlist(sapply(z,function(l){grep("End time:",l)}))) ==0){
+    return;
+  }
+
+  tps <- c(2,30)
+  #get replication num, date, from param
+  params <- read.csv(paste0(this_dir,"/params.csv"),stringsAsFactors=FALSE)
+  num_agents <- as.numeric(params[params$parameter=="Agent Count","value"])
+
+
+  date <- ymd(paste0(params[params$parameter=="Date","value"],"-01"))
+
+  ##Agent-Country
+  groups <- read.csv(paste0(this_dir,"/../AG.csv"))
+  countries <- read.csv(paste0(source_dir,"gold_topics/countries.txt"),header=FALSE,stringsAsFactors=F)
+  countries <- rbind(countries,data.frame(V1="WESTERNERS"))
+  agent_countries <- groups[groups$Group %in% countries$V1,]
+
+
+  ##Agent-Belief (has country as well)
+  agent_names <- get_names_data(paste0(this_dir,"/../agent_map.csv"))
+  belief_names <- get_names_data(paste0(this_dir,"/../beliefs_map.csv"))
+  beliefs <- get_data(paste0(this_dir,"/belief_output.csv"),
+                      belief_names$Term,
+                      num_agents,agent_countries,
+                      agent_names,tps)
+
+  knowledge_names <- get_names_data(paste0(this_dir,"/../knowledge_map.csv"))
+  kn <- ddply(knowledge_names,.(Term),function(l){data.frame(v=1:nrow(l))})
+  knowledge_names <- orderBy(~Term,knowledge_names)
+  kn<- orderBy(~Term,kn)
+  knowledge_names$index <- kn$v
+  knowledge_names <- orderBy(~Mapping, knowledge_names)
+
+  ##Agent-KnowledgeBelief
+  ##Get Agent-Knowledge
+  knowledge <- get_data(paste0(this_dir,"/knowledge_output.csv"),
+                        paste(knowledge_names$Term,knowledge_names$index),
+                        num_agents,agent_countries,
+                        agent_names,tps)
+  ##Get Knowledge-Belief
+  tb <- read.csv(paste0(this_dir,"/../TB_indexed.csv"))
+  tb$target <- tb$target+1
+  tb <- merge(tb, belief_names,by.x="source",by.y="Mapping")
+  tb$PN <- ifelse(tb$weight >0, "Pos","Neg")
+  tb$Belief <- paste(tb$Term,tb$PN,sep="_")
+  indicies <- tapply(tb$target,tb$Belief, unique)
+  ###***####
+  length(intersect(indicies[[2]],indicies[[4]])) 
+
+  ##Sum agent knowledge to beliefs
+  ab_mat <- matrix(data=0,nrow=nrow(knowledge),ncol=length(indicies))
+  for(i in 1:(length(indicies))){
+    kb_set <- indicies[[i]]
+    z <-  apply(knowledge,1,function(l){sum(as.numeric(l[kb_set]))})
+    ab_mat[,i] <- z
+  }
+  ab_by_k <- data.frame(ab_mat)
+  names(ab_by_k) <- names(indicies)
+  ab_by_k$agent <- knowledge$agent
+  ab_by_k$tp <- knowledge$tp
+
+  gfp <- params[params$parameter=="group_flip_to_positive","value"]
+  gfn <- params[params$parameter=="group_flip_to_negative","value"]
+  ab <-params[params$parameter=="agent_bias","value"]
+  out <- cbind(beliefs,ab_by_k[,1:4])
+  out$replication <- 1
+  out$date <- date
+  out$group_flip_to_positive <- gfp
+  out$group_flip_to_negative <- gfn
+  out$agent_bias <- ab
+
+  out[, RevolutionByKnowledge:= Revolution_Pos-Revolution_Neg]
+  out[, ViolenceByKnowledge:= Violence_Pos-Violence_Neg]
+  write.csv(out, paste0(this_dir,"/",date,paste(gfp,gfn,ab,"agg_out.csv",sep="_")))
+  print("DONE")
+})
+
+
+files = Sys.glob("final_nets/rev*/30_*/*_agg_out.csv")
+l <- vector("list",length(files))
+for(i in 1:length(files) ){
+  l[[i]] <- fread(files[i])
+}
+dt <- rbindlist(l)
+write.csv(dt, "github/results_sim_final.csv")
+
+
+
+
diff --git a/analysis/generate_figures_3,4,5.R b/analysis/generate_figures_3,4,5.R
@@ -0,0 +1,111 @@
+library(reshape2)
+library(plyr)
+library(data.table)
+library(ggplot2)
+
+top_dir <- "~/Dropbox/Kenny/current_papers/ArabSpring/github/"
+file <- paste0(top_dir,"rev_2011-01/")
+countries <- read.csv(paste0(top_dir,"gold_topics/countries.txt"),header=FALSE,stringsAsFactors=FALSE)
+westerners <- read.csv(paste0(top_dir,"gold_topics/westerners.csv"),header=FALSE,stringsAsFactors=FALSE)
+source(paste0(top_dir,"setup/data_functions.R"))
+source(paste0(top_dir,"setup/plotting_functions.R"))
+
+TT_net <- read.csv(paste0(file,"/TT.csv"),stringsAsFactors=FALSE)
+AA_net <- read.csv(paste0(file,"/AA.csv"),stringsAsFactors=FALSE)
+AT_net <- read.csv(paste0(file,"/AT.csv"),stringsAsFactors=FALSE)
+CTA_net <- read.csv(paste0(file,"/ATC.csv"),stringsAsFactors=FALSE)
+F1 <- read.csv(paste(file,"/F1.csv",sep=""),stringsAsFactors=FALSE)
+names(CTA_net) <- c("Country","Topic","Agent","Weight")
+
+TB_net_data <- get_tb_data(F1,file)
+TB_net <- melt(TB_net_data)
+TB_net <- TB_net[TB_net$value != 0,]
+TB_net$v2 <- sign(TB_net$value)* ceiling(log(abs(TB_net$value)))
+TB_net$v2 <- ifelse(TB_net$v2==0,1,TB_net$v2)
+TB_net <- TB_net[,c(2,1,4)]
+names(TB_net) <- c("Belief","Topic","Weight")
+TT_net <- TT_net[TT_net$Source %in% TB_net_data$Topic &
+TT_net$Destination %in% TB_net_data$Topic,]
+
+####First plot, f1_file is from get_tb_data
+f1_file <- data.table(F1)
+f1_file <- f1_file[f1_file$SameCount > 0,]
+f1_file$Precision <- f1_file$SameCount/f1_file$TopicArticleCount
+f1_file$Recall <- f1_file$SameCount/f1_file$BeliefTopicCount
+f1_file$F1 <- 2*(f1_file$Precision*f1_file$Recall)/(f1_file$Precision+f1_file$Recall)
+f1_file$WF1 <- log(f1_file$SameCount) * f1_file$F1
+f1_file$Metric <- f1_file[,"WF1",with=F]
+f1_file <- f1_file[f1_file$Topic !=f1_file$BeliefTopic,]
+#######Figure 3
+topic_distro_plot(c("INTERNET SOCIAL NETWORKING",
+                    "FOOD PRICES"),
+                    data.frame(f1_file),"WF1") 
+
+###Figure 4
+ggplot(TB_net_data, aes(Revolution,Violence)) + geom_point() 
+
+######Work for Figure 5
+CTA_net <- data.table(CTA_net[CTA_net$Topic %in% TB_net_data$Topic,])
+TT_net <- TT_net[TT_net$Source %in% TB_net_data$Topic & 
+                   TT_net$Destination %in% TB_net_data$Topic,]
+
+AT_net <- data.table(AT_net[AT_net$Destination %in% TB_net_data$Topic,])
+AT_trans_net <- AT_net[,list(Topic=Destination[1],Weight=Weight/sum(Weight)),by="Source"]
+CTA_net <- data.table(CTA_net[CTA_net$Topic %in% TB_net_data$Topic,])
+
+##ASSOCIATE AGENTS WITH A SINGLE COUNTRY
+atc_sub <- CTA_net[,list(weight_sum=sum(Weight)), by=c("Agent","Country")]
+atc_sub <- atc_sub[, list(Country=Country[which.max(weight_sum)],
+                          Weight=max(weight_sum)),
+                   by=c("Agent")]
+
+###ONLY CARE ABOUT AGENTS IN THE CURRENT COUNTRIES
+AC_net <- atc_sub[atc_sub$Country %in% countries$V1,]
+names(AC_net) <- c("Source","Destination","Weight")
+AT_net <- AT_net[AT_net$Source %in% AC_net$Source,]
+AT_trans_net <- AT_trans_net[AT_trans_net$Source %in% AC_net$Source,]
+AA_net <- AA_net[AA_net$Source %in% AC_net$Source & AA_net$Destination %in% AC_net$Source,]
+
+##Create within-group countries using model-based clustering on the belief space
+##Westerners simply stay in their own group
+AC_net[grep(paste(westerners$V1,collapse="|"),AC_net$Source),]$Destination <-"WESTERNERS"
+
+z <- merge(data.frame(AT_net), TB_net_data, by.x="Destination",by.y="Topic",all.x=TRUE,all.y=FALSE)
+
+z <- ddply(z, .(Source),summarise,rev = sum(Revolution),viol=sum(Violence))
+d <- merge(z, AC_net, by.x="Source",by.y="Source")
+t <- d[d$Destination=="EGYPT",]
+library(mclust)
+clust <- Mclust(t[,c("rev","viol")],2:20)
+f <- data.frame(agent=t$Source,group=paste(t$Destination[1],class=clust$classification))
+t <- merge(t, f, by.x="Source",by.y="agent")
+##Have to do this for ORA
+t$viol <- t$viol*-1
+egypt_agents <- AC_net[AC_net$Destination =="EGYPT","Source",with=F]$Source
+egypt_aa <- AA_net[AA_net$Source %in% egypt_agents & AA_net$Destination %in% egypt_agents,]
+
+####Go create figure 5 in ORA
+write.csv(egypt_aa, "~/Desktop/aa.csv")
+write.csv(t, "~/Desktop/countries.csv")
+
+
+
+
+
+
+
+#######Topic by topic network, colored by belief
+
+network <- TT_net
+belief_data <- TB_net_data
+nodes <- data.frame(id=unique(c(network$Source,network$Destination)))
+nodes <- merge(nodes, belief_data, by.x="id",by.y="Topic",all.x=TRUE,all.y=FALSE)
+
+c_palatte <- colorRampPalette(c("dark red","white","dark blue"))(nrow(nodes))
+nodes <- orderBy(~-Revolution, nodes)
+nodes$Rev_Color <- c_palatte
+nodes <- orderBy(~-Violence, nodes)
+nodes$Viol_Color <- c_palatte
+write.csv(nodes,"~/Desktop/nodes_as.csv")
+write.csv(network,"~/Desktop/nodes_net.csv")
+
diff --git a/analysis/results_functions.R b/analysis/results_functions.R
@@ -0,0 +1,25 @@
+get_names_data <- function(file_name){
+  names <- read.csv(file_name)
+  names$Term <- as.character(names$Term)
+  names <- orderBy(~Mapping,names)
+  names
+}
+
+
+get_data <- function(file_name,col_names, 
+                     nAgents,aCountry,
+                     aNames,timeper){
+  data <- read.csv(file_name,header=FALSE)
+  data <- data.table(data)
+  n_data <- names(data)
+  for(i in 1:length(n_data)){
+    setnames(data,n_data[i],col_names[i])
+  }
+  data$tp <- rep(timeper,each=nAgents)
+  data$agent <- rep(aNames$Term,length(timeper))
+  data$country <- rep(aCountry$Group, length(timeper))
+  data
+}
+
+
+
diff --git a/article_draft.pdf b/article_draft.pdf
diff --git a/data/results_overthrow.csv b/data/results_overthrow.csv
@@ -0,0 +1 @@
+Country,Date of Government Overthrow,Model PredictionALGERIA,None,NoneBAHRAIN,None,2/1/11EGYPT,2/1/11,10/1/10IRAN,None,10/1/10IRAQ,None,4/1/11JORDAN,None,NoneKUWAIT,None,NoneLEBANON,None,NoneLIBYA,8/1/11,2/1/11MOROCCO,None,NoneOMAN,None,NoneSAUDI ARABIA,None,8/1/10SYRIA,None (Civil War began in March),4/1/11TUNISIA,1/1/11,1/1/11U.A.E.,None,10/1/10YEMEN,1/1/11,3/1/11