Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
kennyjoseph committed Oct 7, 2013
0 parents commit 888f3cb
Show file tree
Hide file tree
Showing 32 changed files with 1,281,911 additions and 0 deletions.
10 changes: 10 additions & 0 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ggsve
library(ggplot2)
source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
traceback()
install.packages("ggplot2")
source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
source('D:/kjoseph/Dropbox/Kenny/current_papers/ArabSpring/github/filter_tune_(step2).R')
10 changes: 10 additions & 0 deletions analysis/.Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
source('D:/kjoseph/Dropbox/Kenny/current_papers/current/ArabSpring/github/analysis/gen_results.R')
source('D:/kjoseph/Dropbox/Kenny/current_papers/current/ArabSpring/github/analysis/gen_results.R')
require(data.table)
files = Sys.glob("D:/kjoseph/ArabSpring/final_nets/rev*/30_*/*_agg_out.csv")
l <- vector("list",length(files))
for(i in 1:length(files) ){
l[[i]] <- fread(files[i])
}
dt <- rbindlist(l)
write.csv(dt, "D:/kjoseph/Dropbox/Kenny/current_papers/current/ArabSpring/github/results2.csv")
116 changes: 116 additions & 0 deletions analysis/analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
library(data.table)
library(lubridate)
library(reshape)
library(plyr)
library(stringr)
library(ggplot2)
library(scales)
library(zoo)
library(reshape2)

theme_set(theme_bw(20))
source_dir <- "github/"


lower_q <- .25
upper_q <- .75
params <- c("date","country","tp","group_flip_to_positive","group_flip_to_negative","agent_bias")
###Get results
d <- fread(paste0(source_dir,"results_sim_final.csv"))
country_level <- d[,list(Violence=sum(Violence),Revolution=sum(Revolution)),by=params]
country_level$date <- ymd(country_level$date)
country_level$country[country_level$country == "IRAN ISLAMIC REPUBLIC OF"] <- "IRAN"
country_level$country[country_level$country == "LIBYAN ARAB JAMAHIRIYA"] <- "LIBYA"
country_level$country[country_level$country == "UNITED ARAB EMIRATES"] <- "U.A.E."

###Plot of results
cl <- melt(country_level, id=params)
cl <- dcast(cl,formula(paste(paste(params[params!="tp"],collapse="+"),"variable",sep="~")),value.var="value", function(d){d[2]-d[1]})
cl <- cl[!is.na(cl$Revolution),]
cl <- ddply(cl, .(date,country), summarise, Violence = mean(Violence), Revolution=mean(Revolution))

cl <- melt(cl, id=c("date","country"))
cl <- cl[cl$country != "QATAR" & cl$country !="WESTERNERS",]


iqr_per_month <- ddply(cl, .(date,variable), function(f){
d <- quantile(f$value,probs=(c(lower_q,.5,upper_q)))
data.frame(Lower=d[1],Median=d[2],Upper=d[3])
})

q <- ddply(cl, .(country,variable), function(d){
f <- zoo(d$value, d$date)
rev <- data.frame(rollapply(f,3, partial=TRUE,align="right",FUN=function(f){return(quantile(f,probs=c(lower_q,.5,upper_q)))}))

data.frame(date=ymd(rownames(rev)),low=rev[,1],med=rev[,2],high=rev[,3])
})

blah <- merge(q,cl)
blah <- merge(blah, iqr_per_month)
v <- with(blah,blah[variable=="Violence" & blah$value > high & blah$value > Upper,])
r <- with(blah,blah[variable=="Revolution" & blah$value < low & blah$value < Lower,])
res <-merge(r[,c(1:3)],v[,c(1:3)],by=c("date","country"))
res[!duplicated(res$country),]
res <- res[!duplicated(res$country),1:2]
cl$Prediction <- F
cl[as.vector(unlist(adply(res, 1, function(f){which(cl$country == f$country & cl$date == f$date)})[,c("V1","V2")])),"Prediction"] <- T



v_plot <- ggplot(cl, aes(date,value,color=variable))
v_plot <- v_plot + theme(axis.text.x=element_text(angle=45,hjust=1), legend.title=element_blank()) + xlab("Month")
v_plot <- v_plot + geom_line(size=1.3,alpha=.7) + facet_wrap(~country,nrow=4,scales="free_y")
v_plot <- v_plot + geom_linerange(data=iqr_per_month,aes(y=Median,ymin=Lower,ymax=Upper),color='black')
v_plot <- v_plot + geom_linerange(data=q, aes(y=med,ymin=low,ymax=high),color='blue')

v_plot <- v_plot + geom_point(data=cl[cl$Prediction,],color='red', size=5)
v_plot <- v_plot + ylab("Change in Belief (summation over agents)")
v_plot <- v_plot + scale_x_datetime(breaks=date_breaks("3 months"),
labels=date_format("%b %Y"))

res_out <- data.frame(country=unique(cl$country),actual="None",stringsAsFactors=F)
res_out <- merge(res_out,res,all.x=T)
res_out$date <- as.character(res_out$date)
res_out$date[is.na(res_out$date)] <- "None"
res_out$actual[res_out$country=="TUNISIA"] <- "2011-01-01"
res_out$actual[res_out$country=="EGYPT"] <- "2011-02-01"
res_out$actual[res_out$country=="LIBYA"] <- "2011-08-01"
res_out$actual[res_out$country=="YEMEN"] <- "2011-01-01"
write.csv(res_out, paste0(source_dir,"results_overthrow.csv"))


##Get protests data
protests <- read.csv(paste0(source_dir,"analysis/protest_counts.csv"),stringsAsFactors=F)
protests[is.na(protests)] <- 0
protests$date <- mdy(protests$date)
protests$QATAR <- NULL
names(protests)[names(protests)=="SAUDI.ARABIA"] <- "SAUDI ARABIA"
names(protests)[names(protests)=="U.A.E"] <- "U.A.E."
###Plot of protests
p1 <- ggplot(melt(protests[,-which(names(protests)=="QATAR")],id="date"),
aes(date,value))
p1 <- p1 + geom_point() + geom_line() + facet_wrap(~variable, nrow=2)
p1 <- p1 + theme(axis.text.x=element_text(angle=45,hjust=1))
p1 <- p1 + ylab("Number of Protests") + xlab("Month")

###Plot of protests w/ results
melt_protests <- melt(protests,id="date")
names(melt_protests) <- c("date","country","protests")
country_merge <- cl[cl$variable == "Violence",]
cl2 <- merge(country_merge, melt_protests, by=c("date","country"))
cl2$Prediction <- NULL
cl2 <- cl2[cl2$country %in% unique(res$country[! res$country %in% c("IRAQ","IRAN") ]),]
cl2$variable <- NULL
cl2 <- melt(cl2, id=c("date","country"))
cl2 <- ddply(cl2, .(date,country,variable), function(d){abs(d$value)/max(abs(cl2[cl2$country==d$country & cl2$variable==d$variable,]$value))})

prot_plot <- ggplot(cl2, aes(date,V1,color=variable))
prot_plot <- prot_plot + geom_point() + geom_line() + facet_wrap(~country,nrow=1)
prot_plot <- prot_plot + theme(axis.text.x=element_text(angle=45, hjust=1))
prot_plot <- prot_plot + scale_x_datetime(breaks=date_breaks("3 months"),
labels=date_format("%b %Y"),
limits=c(ymd("2010-12-01"),
ymd("2011-12-01")))
prot_plot <- prot_plot + xlab("Month") + ylab("Percent of maximum value (per time series)")
prot_plot <- prot_plot + scale_color_discrete("",labels=c("Revolution Belief","Number of Protests"))
prot_plot
118 changes: 118 additions & 0 deletions analysis/gen_results.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
require(data.table)
require(ggplot2)
require(plyr)
require(doBy)
require(reshape)
require(lubridate)
require(stringr)
require(snowfall)
require(data.table)
source_dir = "github/"

sfInit(parallel=TRUE,cpus=10)
sfSource(paste0(source_dir,"analysis/results_functions.R"))
sfExport("source_dir")
sfLibrary(data.table)
sfLibrary(ggplot2)
sfLibrary(plyr)
sfLibrary(doBy)
sfLibrary(reshape)
sfLibrary(lubridate)
sfLibrary(stringr)
####Agent Country Replication Date TP RevB ViolB Rev+K Rev-K Viol+K Viol-K


parSapply(sfGetCluster(),Sys.glob("final_nets/rev_*/30*"), function(this_dir){
print(this_dir)
con <- file(paste0(this_dir,"/out.txt"))
z <- readLines(con)
if(sum(unlist(sapply(z,function(l){grep("End time:",l)}))) ==0){
return;
}

tps <- c(2,30)
#get replication num, date, from param
params <- read.csv(paste0(this_dir,"/params.csv"),stringsAsFactors=FALSE)
num_agents <- as.numeric(params[params$parameter=="Agent Count","value"])


date <- ymd(paste0(params[params$parameter=="Date","value"],"-01"))

##Agent-Country
groups <- read.csv(paste0(this_dir,"/../AG.csv"))
countries <- read.csv(paste0(source_dir,"gold_topics/countries.txt"),header=FALSE,stringsAsFactors=F)
countries <- rbind(countries,data.frame(V1="WESTERNERS"))
agent_countries <- groups[groups$Group %in% countries$V1,]


##Agent-Belief (has country as well)
agent_names <- get_names_data(paste0(this_dir,"/../agent_map.csv"))
belief_names <- get_names_data(paste0(this_dir,"/../beliefs_map.csv"))
beliefs <- get_data(paste0(this_dir,"/belief_output.csv"),
belief_names$Term,
num_agents,agent_countries,
agent_names,tps)

knowledge_names <- get_names_data(paste0(this_dir,"/../knowledge_map.csv"))
kn <- ddply(knowledge_names,.(Term),function(l){data.frame(v=1:nrow(l))})
knowledge_names <- orderBy(~Term,knowledge_names)
kn<- orderBy(~Term,kn)
knowledge_names$index <- kn$v
knowledge_names <- orderBy(~Mapping, knowledge_names)

##Agent-KnowledgeBelief
##Get Agent-Knowledge
knowledge <- get_data(paste0(this_dir,"/knowledge_output.csv"),
paste(knowledge_names$Term,knowledge_names$index),
num_agents,agent_countries,
agent_names,tps)
##Get Knowledge-Belief
tb <- read.csv(paste0(this_dir,"/../TB_indexed.csv"))
tb$target <- tb$target+1
tb <- merge(tb, belief_names,by.x="source",by.y="Mapping")
tb$PN <- ifelse(tb$weight >0, "Pos","Neg")
tb$Belief <- paste(tb$Term,tb$PN,sep="_")
indicies <- tapply(tb$target,tb$Belief, unique)
###***####
length(intersect(indicies[[2]],indicies[[4]]))

##Sum agent knowledge to beliefs
ab_mat <- matrix(data=0,nrow=nrow(knowledge),ncol=length(indicies))
for(i in 1:(length(indicies))){
kb_set <- indicies[[i]]
z <- apply(knowledge,1,function(l){sum(as.numeric(l[kb_set]))})
ab_mat[,i] <- z
}
ab_by_k <- data.frame(ab_mat)
names(ab_by_k) <- names(indicies)
ab_by_k$agent <- knowledge$agent
ab_by_k$tp <- knowledge$tp

gfp <- params[params$parameter=="group_flip_to_positive","value"]
gfn <- params[params$parameter=="group_flip_to_negative","value"]
ab <-params[params$parameter=="agent_bias","value"]
out <- cbind(beliefs,ab_by_k[,1:4])
out$replication <- 1
out$date <- date
out$group_flip_to_positive <- gfp
out$group_flip_to_negative <- gfn
out$agent_bias <- ab

out[, RevolutionByKnowledge:= Revolution_Pos-Revolution_Neg]
out[, ViolenceByKnowledge:= Violence_Pos-Violence_Neg]
write.csv(out, paste0(this_dir,"/",date,paste(gfp,gfn,ab,"agg_out.csv",sep="_")))
print("DONE")
})


files = Sys.glob("final_nets/rev*/30_*/*_agg_out.csv")
l <- vector("list",length(files))
for(i in 1:length(files) ){
l[[i]] <- fread(files[i])
}
dt <- rbindlist(l)
write.csv(dt, "github/results_sim_final.csv")




111 changes: 111 additions & 0 deletions analysis/generate_figures_3,4,5.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
library(reshape2)
library(plyr)
library(data.table)
library(ggplot2)

top_dir <- "~/Dropbox/Kenny/current_papers/ArabSpring/github/"
file <- paste0(top_dir,"rev_2011-01/")
countries <- read.csv(paste0(top_dir,"gold_topics/countries.txt"),header=FALSE,stringsAsFactors=FALSE)
westerners <- read.csv(paste0(top_dir,"gold_topics/westerners.csv"),header=FALSE,stringsAsFactors=FALSE)
source(paste0(top_dir,"setup/data_functions.R"))
source(paste0(top_dir,"setup/plotting_functions.R"))

TT_net <- read.csv(paste0(file,"/TT.csv"),stringsAsFactors=FALSE)
AA_net <- read.csv(paste0(file,"/AA.csv"),stringsAsFactors=FALSE)
AT_net <- read.csv(paste0(file,"/AT.csv"),stringsAsFactors=FALSE)
CTA_net <- read.csv(paste0(file,"/ATC.csv"),stringsAsFactors=FALSE)
F1 <- read.csv(paste(file,"/F1.csv",sep=""),stringsAsFactors=FALSE)
names(CTA_net) <- c("Country","Topic","Agent","Weight")

TB_net_data <- get_tb_data(F1,file)
TB_net <- melt(TB_net_data)
TB_net <- TB_net[TB_net$value != 0,]
TB_net$v2 <- sign(TB_net$value)* ceiling(log(abs(TB_net$value)))
TB_net$v2 <- ifelse(TB_net$v2==0,1,TB_net$v2)
TB_net <- TB_net[,c(2,1,4)]
names(TB_net) <- c("Belief","Topic","Weight")
TT_net <- TT_net[TT_net$Source %in% TB_net_data$Topic &
TT_net$Destination %in% TB_net_data$Topic,]

####First plot, f1_file is from get_tb_data
f1_file <- data.table(F1)
f1_file <- f1_file[f1_file$SameCount > 0,]
f1_file$Precision <- f1_file$SameCount/f1_file$TopicArticleCount
f1_file$Recall <- f1_file$SameCount/f1_file$BeliefTopicCount
f1_file$F1 <- 2*(f1_file$Precision*f1_file$Recall)/(f1_file$Precision+f1_file$Recall)
f1_file$WF1 <- log(f1_file$SameCount) * f1_file$F1
f1_file$Metric <- f1_file[,"WF1",with=F]
f1_file <- f1_file[f1_file$Topic !=f1_file$BeliefTopic,]
#######Figure 3
topic_distro_plot(c("INTERNET SOCIAL NETWORKING",
"FOOD PRICES"),
data.frame(f1_file),"WF1")

###Figure 4
ggplot(TB_net_data, aes(Revolution,Violence)) + geom_point()

######Work for Figure 5
CTA_net <- data.table(CTA_net[CTA_net$Topic %in% TB_net_data$Topic,])
TT_net <- TT_net[TT_net$Source %in% TB_net_data$Topic &
TT_net$Destination %in% TB_net_data$Topic,]

AT_net <- data.table(AT_net[AT_net$Destination %in% TB_net_data$Topic,])
AT_trans_net <- AT_net[,list(Topic=Destination[1],Weight=Weight/sum(Weight)),by="Source"]
CTA_net <- data.table(CTA_net[CTA_net$Topic %in% TB_net_data$Topic,])

##ASSOCIATE AGENTS WITH A SINGLE COUNTRY
atc_sub <- CTA_net[,list(weight_sum=sum(Weight)), by=c("Agent","Country")]
atc_sub <- atc_sub[, list(Country=Country[which.max(weight_sum)],
Weight=max(weight_sum)),
by=c("Agent")]

###ONLY CARE ABOUT AGENTS IN THE CURRENT COUNTRIES
AC_net <- atc_sub[atc_sub$Country %in% countries$V1,]
names(AC_net) <- c("Source","Destination","Weight")
AT_net <- AT_net[AT_net$Source %in% AC_net$Source,]
AT_trans_net <- AT_trans_net[AT_trans_net$Source %in% AC_net$Source,]
AA_net <- AA_net[AA_net$Source %in% AC_net$Source & AA_net$Destination %in% AC_net$Source,]

##Create within-group countries using model-based clustering on the belief space
##Westerners simply stay in their own group
AC_net[grep(paste(westerners$V1,collapse="|"),AC_net$Source),]$Destination <-"WESTERNERS"

z <- merge(data.frame(AT_net), TB_net_data, by.x="Destination",by.y="Topic",all.x=TRUE,all.y=FALSE)

z <- ddply(z, .(Source),summarise,rev = sum(Revolution),viol=sum(Violence))
d <- merge(z, AC_net, by.x="Source",by.y="Source")
t <- d[d$Destination=="EGYPT",]
library(mclust)
clust <- Mclust(t[,c("rev","viol")],2:20)
f <- data.frame(agent=t$Source,group=paste(t$Destination[1],class=clust$classification))
t <- merge(t, f, by.x="Source",by.y="agent")
##Have to do this for ORA
t$viol <- t$viol*-1
egypt_agents <- AC_net[AC_net$Destination =="EGYPT","Source",with=F]$Source
egypt_aa <- AA_net[AA_net$Source %in% egypt_agents & AA_net$Destination %in% egypt_agents,]

####Go create figure 5 in ORA
write.csv(egypt_aa, "~/Desktop/aa.csv")
write.csv(t, "~/Desktop/countries.csv")







#######Topic by topic network, colored by belief

network <- TT_net
belief_data <- TB_net_data
nodes <- data.frame(id=unique(c(network$Source,network$Destination)))
nodes <- merge(nodes, belief_data, by.x="id",by.y="Topic",all.x=TRUE,all.y=FALSE)

c_palatte <- colorRampPalette(c("dark red","white","dark blue"))(nrow(nodes))
nodes <- orderBy(~-Revolution, nodes)
nodes$Rev_Color <- c_palatte
nodes <- orderBy(~-Violence, nodes)
nodes$Viol_Color <- c_palatte
write.csv(nodes,"~/Desktop/nodes_as.csv")
write.csv(network,"~/Desktop/nodes_net.csv")

25 changes: 25 additions & 0 deletions analysis/results_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
get_names_data <- function(file_name){
names <- read.csv(file_name)
names$Term <- as.character(names$Term)
names <- orderBy(~Mapping,names)
names
}


get_data <- function(file_name,col_names,
nAgents,aCountry,
aNames,timeper){
data <- read.csv(file_name,header=FALSE)
data <- data.table(data)
n_data <- names(data)
for(i in 1:length(n_data)){
setnames(data,n_data[i],col_names[i])
}
data$tp <- rep(timeper,each=nAgents)
data$agent <- rep(aNames$Term,length(timeper))
data$country <- rep(aCountry$Group, length(timeper))
data
}



Binary file added article_draft.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions data/results_overthrow.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Country,Date of Government Overthrow,Model PredictionALGERIA,None,NoneBAHRAIN,None,2/1/11EGYPT,2/1/11,10/1/10IRAN,None,10/1/10IRAQ,None,4/1/11JORDAN,None,NoneKUWAIT,None,NoneLEBANON,None,NoneLIBYA,8/1/11,2/1/11MOROCCO,None,NoneOMAN,None,NoneSAUDI ARABIA,None,8/1/10SYRIA,None (Civil War began in March),4/1/11TUNISIA,1/1/11,1/1/11U.A.E.,None,10/1/10YEMEN,1/1/11,3/1/11
Expand Down
Loading

0 comments on commit 888f3cb

Please sign in to comment.