Skip to content

Commit

Permalink
more plots / descriptive findings
Browse files Browse the repository at this point in the history
  • Loading branch information
rochelleterman committed Oct 11, 2016
1 parent 79dbb63 commit 1a308a2
Show file tree
Hide file tree
Showing 47 changed files with 70 additions and 27 deletions.
14 changes: 7 additions & 7 deletions 02_NER.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 1,
"metadata": {
"collapsed": false
},
Expand All @@ -33,18 +33,18 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"9223372036854775807"
"131072"
]
},
"execution_count": 12,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -58,7 +58,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 3,
"metadata": {
"collapsed": false
},
Expand All @@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 4,
"metadata": {
"collapsed": false
},
Expand All @@ -82,7 +82,7 @@
" u'PERSON': [u'Nicolae Ceausescu', u'Ceausescu']}"
]
},
"execution_count": 14,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
5 changes: 3 additions & 2 deletions 03_stm_estimate.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ dev.off()
labelTopics(model)

# Example Docs
findThoughts(model,texts=meta$TITLE,n=3,topics=1:15)
thought <- findThoughts(model,texts=meta$TITLE,n=10,topics=6)
plot(thought)

# Add labels
labels = c("Business", "Sports", "Reproductive Health", "Travel", "Fashion", "UN", "Sexual Assault", "Combat", "Women's Rights and Gender Equality", "Politics", "Profiles", "Human Interest", "Marriage & Family", "Religion", "Cancer")
labels = c("Business", "Sports", "Public Health", "Fashion", "Arts", "United Nations", "Sexual Assault", "Combat", "Women's Rights and Gender Equality", "Politics", "Profiles", "Human Interest", "Marriage & Family", "Religion", "Reproductive & Personal Health")

# save data
save(docs, vocab, meta, labels, model, file = "Data/stm.RData")
18 changes: 16 additions & 2 deletions 04_stm_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ print(xtable(dat), type = "html", file="Results/stm/topics.html")
##################################

# Corpus Summary of Topic Proportions
jpeg("Results/stm/corpus-summary.jpeg",width=700,height=550,type="quartz")
pdf("Results/stm/corpus-summary.pdf",width=9.5,height=9)
plot.STM(model,type="summary",custom.labels=labels,main="")
dev.off()

labelTopics(model)

# Topic Correlation
mod.out.corr<-topicCorr(model)
plot.topicCorr(mod.out.corr)
Expand All @@ -59,13 +61,25 @@ for (i in 1:15){
dev.off()
}

# Write Topic Proportion Estimates by Region - PDFs
for (i in 1:15){
file <- file.path("Results/stm/region-proportion-plots/pdfs",paste(as.character(i),".pdf",sep = ""))
pdf(file,width=4,height=3)
plot.estimateEffect(prep,"REGION",method="pointestimate",topics=i,printlegend=TRUE,labeltype="custom",custom.labels=regions,main=labels[i],ci.level=.95,nsims=100)
dev.off()
}

pdf("Results/stm/region-proportion-plots/pdfs/rights.pdf",width=9,height=5.5)
plot.estimateEffect(prep,"REGION",method="pointestimate",topics=9,printlegend=TRUE,labeltype="custom",custom.labels=regions,main="Women's Rights",ci.level=.95,nsims=100)
dev.off()

#######################################################
######### Combine Meta Data + Topic Distributions #####
#######################################################

# Number of Documents by Number of Topics matrix of topic proportions
topic.docs <- as.data.frame(model$theta)
colnames(topic.docs) <- c("business", "sports", "reproductive health", "travel", "fashion", "UN", "rape", "combat", "rights", "politics", "lifestories", "perspectives", "marriage.family", "religion", "cancer")
colnames(topic.docs) <- c("business", "sports", "public health", "travel", "fashion", "UN", "rape", "combat", "rights", "politics", "lifestories", "perspectives", "marriage.family", "religion", "reproductive health")
# add column for top topic for each article
topic.docs$top.topic <- names(topic.docs)[apply(topic.docs, 1, which.max)]

Expand Down
19 changes: 19 additions & 0 deletions 08_descriptive.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
rm(list=ls())
setwd("~/Dropbox/berkeley/Git-Repos/worlds-women")
library(plyr)
library(dplyr)
library(ggplot2)
library(reshape2)

Expand Down Expand Up @@ -44,6 +45,24 @@ summary(as.factor(rt$n.binary))
# number of obs > 1979
nrow(rt[rt$year > 1979,])# 6292

##################################
######## women's rights #######
##################################

# make composite
rt$women_composite <- rowMeans(cbind(rt$wopol,rt$wosoc,rt$wecon), na.rm = T)
histogram(rt$women_composite)

# split into 2 samples
muslim <- rt[rt$muslim.maj==1,]
non.muslim <- rt[rt$muslim.maj==0,]

# best n worst muslim countries
arrange(ddply(muslim, .(country), summarize, record = mean(women_composite, na.rm = T)), desc(record))

# best n worst non-muslim countries
arrange(ddply(non.muslim, .(country), summarize, record = mean(women_composite, na.rm = T)), desc(record))

##################################
######## Words per Topics #######
##################################
Expand Down
1 change: 1 addition & 0 deletions 09_model-tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ stargazer(rt, type="text")
# subset with an observation
rt.1 <- rt[rt$n.binary==1,]
summary(rt.1$muslim)
stargazer(rt.1, type="text")

# israel
rt$mena[rt$ccode == 666] <- 0
Expand Down
18 changes: 13 additions & 5 deletions 10_model.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ nb1 <- glm.nb(n.docs ~ n.docs.lag + count + women_composite*muslim.maj + polity2
summary(nb1)
nb1.se <- coeftest(nb1, vcov=function(x) vcovHC(x, cluster="group", type="HC1"))[,2]
jpeg("Results/regressions/interactive-plots/nb1.jpeg",width=700,height=500,type="quartz")
pdf("Results/regressions/interactive-plots/nb1.pdf",width=7,height=5)
interaction_plot_binary(nb1, effect="women_composite", moderator="muslim.maj", interaction="women_composite:muslim.maj", factor_labels=c("Not Muslim Majority","Muslim Majority"), xlabel="", ylabel="Marginal Effect of Women's Rights on Coverage", title="")
dev.off()
Expand All @@ -382,7 +382,7 @@ nb2 <- glm.nb(n.docs ~ n.docs.lag + count + women_composite*mena + polity2 + dom
summary(nb2)
nb2.se <- coeftest(nb2, vcov=function(x) vcovHC(x, cluster="group", type="HC1"))[,2]
jpeg("Results/regressions/interactive-plots/nb2.jpeg",width=700,height=500,type="quartz")
pdf("Results/regressions/interactive-plots/nb2.pdf",width=7,height=5)
interaction_plot_binary(nb2, effect="women_composite", moderator="mena", interaction="women_composite:mena", factor_labels=c("Not Mena","Mena"), xlabel="", ylabel="Marginal Effect of Women's Rights on Coverage", title="")
dev.off()
Expand All @@ -391,7 +391,7 @@ nb3 <- glm.nb(n.docs ~ n.docs.lag + count + women_composite*muslim + polity2 + d
summary(nb3)
nb3.se <- coeftest(nb3, vcov=function(x) vcovHC(x, cluster="group", type="HC1"))[,2]
jpeg("Results/regressions/interactive-plots/nb3.jpeg",width=700,height=500,type="quartz")
pdf("Results/regressions/interactive-plots/nb3.pdf",width=7,height=5)
interaction_plot_continuous(nb3, effect="women_composite", moderator="muslim", interaction="women_composite:muslim", mean=T, title="",xlabel="Percentage Muslim", ylabel="Marginal Effect of Women's Rights on Coverage")
dev.off()
Expand Down Expand Up @@ -603,10 +603,16 @@ stargazer(nb.r7.1, nb.r7.2, nb.r7.3, type = "html", out = "Results/regressions/r
heckit1 <- heckit(n.binary ~ n.binary.lag + count + women_composite*muslim.maj + polity2 + domestic9 + log(pop.wdi) +log(gdp.pc.un),
rights ~ rights.lag + women_composite + muslim.maj + polity2 + physint,
rt )
summary(heckit1)
summary(heckit1$lm)
heckit1.se = coeftest(heckit1$lm, vcov=function(x) vcovHC(x, cluster="group", type="HC1"))[,2]
heckit1.se
# Coefficient Plot
require(coefplot)
pdf(file = "Results/regressions/main-results/heckit1-coefplot.pdf", width = 8, height = 5)
coefplot(heckit1$lm, title = "", coefficients = c("XOrights.lag", "XOwomen_composite", "XOmuslim.maj", "XOpolity2", "XOphysint"), newNames = c(XOrights.lag = "Lagged DV" , XOwomen_composite = "Women's Rights Index", XOmuslim.maj = "Muslim Majority", XOpolity2 = "Physical Integrity Rights", XOphysint = "Democracy"))
dev.off()
# MENA
heckit2 <- heckit(n.binary ~ n.binary.lag + count + women_composite*mena + polity2 + domestic9 + log(pop.wdi) +log(gdp.pc.un),
rights ~ rights.lag + women_composite + mena + polity2 + physint,
Expand All @@ -620,8 +626,10 @@ heckit3 <- heckit(n.binary ~ n.binary.lag + count + women_composite*muslim + pol
summary(heckit3$lm)
heckit3.se = coeftest(heckit3$lm, vcov=function(x) vcovHC(x, cluster="group", type="HC1"))[,2]
# print
# print - HTML
stargazer(heckit1$lm, heckit2$lm, heckit3$lm, type = "html", out = "Results/regressions/main-results/heckit.html", label = "table:heckit", style = "ajps", title = "Two-Step Analysis of Rights Focus in U.S. News Coverage about Women Abroad", se = list(heckit1.se, heckit2.se, heckit3.se), notes="Robust standard errors clustered on country appear in parentheses.", dep.var.labels = "Rights Focus", covariate.labels=c("Intercept", "Lagged DV", "Women's Rights Index","Muslim Majority","MENA", "Muslim Percentage", "Democracy", "Physical Integrity Rights"), star.cutoffs = c(0.05, 0.01, 0.001))
```

## 4.2 Partial Models
Expand Down
Binary file added Results/regressions/interactive-plots/nb1.pdf
Binary file not shown.
Binary file added Results/regressions/interactive-plots/nb2.pdf
Binary file not shown.
Binary file added Results/regressions/interactive-plots/nb3.pdf
Binary file not shown.
Binary file not shown.
Binary file removed Results/stm/corpus-summary.jpeg
Binary file not shown.
Binary file added Results/stm/corpus-summary.pdf
Binary file not shown.
Binary file added Results/stm/labels-1.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Results/stm/labels-2.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 5 additions & 5 deletions Results/stm/region-distributions-per-topic.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"","Africa","Asia","EECA","LA","MENA","West","Total"
"Business",7,40.79,5.34,6.92,13.26,26.69,100
"Sports",4.03,20.43,5.62,9.57,7.62,52.73,100
"Reproductive Health",30.73,21.94,4.39,6.51,11.23,25.2,100
"Travel",10.5,29.68,5.13,8.01,15.42,31.27,100
"Fashion",6.3,25.59,7.36,7.34,12.09,41.32,100
"UN",16.45,30.22,5.57,6.5,26.22,15.05,100
"Public Health",30.73,21.94,4.39,6.51,11.23,25.2,100
"Fashion",10.5,29.68,5.13,8.01,15.42,31.27,100
"Arts",6.3,25.59,7.36,7.34,12.09,41.32,100
"United Nations",16.45,30.22,5.57,6.5,26.22,15.05,100
"Sexual Assault",8.33,40.77,6.23,10.24,19.21,15.21,100
"Combat",8.16,19.06,9.03,6.67,44.65,12.44,100
"Women's Rights and Gender Equality",7.75,29.24,3.61,5.86,28.44,25.1,100
Expand All @@ -13,4 +13,4 @@
"Human Interest",10.27,29.68,5.65,6.51,20.76,27.12,100
"Marriage & Family",15.19,38.79,4.54,5.68,24.23,11.56,100
"Religion",10.64,13.5,3.11,2.21,52.15,18.4,100
"Cancer",9.26,25.18,4.84,7.79,7.03,45.89,100
"Reproductive & Personal Health",9.26,25.18,4.84,7.79,7.03,45.89,100
Binary file modified Results/stm/region-proportion-plots/1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/10.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/11.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/12.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/13.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/14.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/15.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/6.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/7.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/8.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Results/stm/region-proportion-plots/9.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Results/stm/region-proportion-plots/pdfs/1.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/10.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/11.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/12.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/13.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/14.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/15.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/2.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/3.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/4.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/5.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/6.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/7.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/8.pdf
Binary file not shown.
Binary file added Results/stm/region-proportion-plots/pdfs/9.pdf
Binary file not shown.
Binary file not shown.
12 changes: 6 additions & 6 deletions Results/stm/topics.html
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
<!-- html table generated in R 3.2.3 by xtable 1.7-4 package -->
<!-- Tue Jan 12 17:45:15 2016 -->
<!-- Sat Oct 8 11:49:18 2016 -->
<table border=1>
<tr> <th> </th> <th> Labels </th> <th> Probability </th> <th> FREX </th> </tr>
<tr> <td align="right"> 1 </td> <td> Business </td> <td> said, work, compani, year, percent, job, busi, worker, million, market </td> <td> compani, bank, industri, factori, employ, market, employe, busi, corpor, manag </td> </tr>
<tr> <td align="right"> 2 </td> <td> Sports </td> <td> team, women, game, play, world, said, olymp, sport, player, first </td> <td> game, olymp, sport, player, soccer, athlet, coach, team, medal, championship </td> </tr>
<tr> <td align="right"> 3 </td> <td> Reproductive Health </td> <td> cancer, health, women, doctor, said, hospit, aid, breast, clinic, year </td> <td> cancer, infect, patient, clinic, virus, hospit, doctor, surgeri, breast, health </td> </tr>
<tr> <td align="right"> 4 </td> <td> Travel </td> <td> black, dress, one, cloth, wear, design, street, fashion, citi, white </td> <td> restaur, jacket, shirt, color, skirt, blue, worn, cloth, fashion, pant </td> </tr>
<tr> <td align="right"> 5 </td> <td> Fashion </td> <td> film, book, show, art, work, stori, life, one, play, write </td> <td> film, artist, novel, art, museum, theater, movi, charact, fiction, reader </td> </tr>
<tr> <td align="right"> 6 </td> <td> UN </td> <td> women, said, will, right, confer, organ, group, world, issu, govern </td> <td> confer, deleg, forum, organ, meet, intern, secretari, peac, committe, statement </td> </tr>
<tr> <td align="right"> 3 </td> <td> Public Health </td> <td> cancer, health, women, doctor, said, hospit, aid, breast, clinic, year </td> <td> cancer, infect, patient, clinic, virus, hospit, doctor, surgeri, breast, health </td> </tr>
<tr> <td align="right"> 4 </td> <td> Fashion </td> <td> black, dress, one, cloth, wear, design, street, fashion, citi, white </td> <td> restaur, jacket, shirt, color, skirt, blue, worn, cloth, fashion, pant </td> </tr>
<tr> <td align="right"> 5 </td> <td> Arts </td> <td> film, book, show, art, work, stori, life, one, play, write </td> <td> film, artist, novel, art, museum, theater, movi, charact, fiction, reader </td> </tr>
<tr> <td align="right"> 6 </td> <td> United Nations </td> <td> women, said, will, right, confer, organ, group, world, issu, govern </td> <td> confer, deleg, forum, organ, meet, intern, secretari, peac, committe, statement </td> </tr>
<tr> <td align="right"> 7 </td> <td> Sexual Assault </td> <td> said, polic, rape, case, report, sexual, violenc, victim, court, crime </td> <td> rape, crime, victim, sentenc, crimin, polic, gang, prosecutor, convict, violenc </td> </tr>
<tr> <td align="right"> 8 </td> <td> Combat </td> <td> said, war, militari, kill, attack, soldier, women, forc, two, combat </td> <td> soldier, troop, bomb, armi, militari, combat, command, civilian, gun, camp </td> </tr>
<tr> <td align="right"> 9 </td> <td> Women's Rights and Gender Equality </td> <td> women, men, femal, law, right, chang, male, equal, mani, issu </td> <td> equal, male, gender, femal, discrimin, men, women, law, status, chang </td> </tr>
Expand All @@ -16,5 +16,5 @@
<tr> <td align="right"> 12 </td> <td> Human Interest </td> <td> said, like, say, one, peopl, just, want, get, can, think </td> <td> know, think, feel, thing, someth, realli, see, lot, tell, just </td> </tr>
<tr> <td align="right"> 13 </td> <td> Marriage &amp; Family </td> <td> famili, girl, women, husband, said, children, villag, live, marri, marriag </td> <td> villag, marriag, famili, rural, bride, marri, girl, shelter, husband, wive </td> </tr>
<tr> <td align="right"> 14 </td> <td> Religion </td> <td> said, islam, religi, right, church, ban, law, countri, women, practic </td> <td> islam, religi, religion, secular, veil, circumcis, fundamentalist, church, genit, koran </td> </tr>
<tr> <td align="right"> 15 </td> <td> Cancer </td> <td> abort, studi, women, said, research, use, percent, report, birth, rate </td> <td> abort, pill, contracept, fertil, implant, hormon, research, studi, method, data </td> </tr>
<tr> <td align="right"> 15 </td> <td> Reproductive &amp; Personal Health </td> <td> abort, studi, women, said, research, use, percent, report, birth, rate </td> <td> abort, pill, contracept, fertil, implant, hormon, research, studi, method, data </td> </tr>
</table>

0 comments on commit 1a308a2

Please sign in to comment.