# Wikipedia Thanks Recipient Supplementary Materials Analysis 
# [PROVISIONAL- DO NOT SHARE OR CITE UNLESS YOU WANT TO DISTRIBUTE POTENTIALLY-INACCURATE INFORMATION]
[J. Nathan Matias](https://natematias.com), May 2024

Key documents:
* [pre-analysis plan: Receiving Thanks on Wikipedia](https://osf.io/c67rg/)
* [data format description](https://docs.google.com/document/d/1plhoDbQryYQ32vZMXu8YmlLSp30QTdup43k6uTePOT4/edit#heading=h.fxaguwxn13cj)
* Tresorit data in `Tresors/CivilServant/projects/wikipedia-integration/gratitude-study`
* [Research Design Decision Document](https://docs.google.com/document/d/1p9rjbAoA4fVHimAGcx3GZ-8xUHXkCtF5WkFT_jxdTOg/edit)
* [Thanker Recruitment Plan](https://docs.google.com/document/d/1K5TNwTdru12z9Nj5ZA-dOH-zfMI8tjJHOacTpf5AVqE/edit)
* [Lab Notebook - CivilServant Wikipedia Gratitude Projecg 2019](https://docs.google.com/document/d/1vmzHJcBs0z_4efo7YDdlC9AhlkEezJnXAMIpVtR-hFM/edit)

Goals:
* Document every part of the study:
  * The approval process for each language
  * Dates for each language
  * Thanker summary table:
      * Number of thankers for each language
      * If possible, summary statistics about the thankers (how long they had contributed to Wikipedia, how many edits they had mad over time), by language
      * Number of thanks they reviewed
      * Number of thanks they sent
  * Thank recipient summary table
      * How many were selected by the algorithm per language
      * How many edits they had made at the time
      * How many were selected by human reviewers
      * How many edits they had made at that time?


In [19]:
# library(plyr)
# library(AER)
# library(tidyverse)
# library(magrittr)
# library(gmodels)
# library(MASS)
# library(estimatr)
# library(ri2)
library(ggplot2)
library(sys)

#library(ggpubr)   # contains ggarrange
#library(png)      # to load logo
#library(grid)

### TODO: Add Source Sans as a font for ggplot2
#library(sysfonts) # to load source sans pro
# https://rdrr.io/github/kjhealy/sourcesans/src/R/sourcesans.r

## Set visual style
catpalette   <- c("#333333", "#ea5324", "#005073", "#7D868C", "#BDBBBB", "#F2F2F2","#F6F2EB")
chartpalette <- c("#ea5324", "#005073", "#7D868C", "#333333", "#F2F2F2","#BDBBBB", "#F6F2EB")

cat.theme <-  theme_bw() +
              theme(plot.title = element_text(size=13, face="bold", color=catpalette[3]),
                    axis.title.x =element_text(size=10, hjust = -0.01, color = catpalette[1]),
                    axis.title.y =element_text(size=10, color = catpalette[1]),
                    panel.background = element_rect(fill=catpalette[6]))

# Load Data

In [9]:
data.path = Sys.getenv('TRESORDIR', '~/Tresors')
tresor.path = 'CivilServant/projects/wikipedia-integration/gratitude-study/Data Drills/thankee/post_experiment_analysis'
fname = 'grat-thankee-all-pre-post-treatment-vars.csv'
thank.fname = '2021-04-30-secondary-thanks.csv'
f.path = file.path(data.path, tresor.path, fname)
all.participants <- read.csv(f.path)
participant.thanks <- read.csv(file.path(data.path, tresor.path, thank.fname))

In [12]:
start.date = "2019-08-02"
end.date   = "2020-02-11"

end.caption.text = "Details at: citizensandtech.org/research/how-do-wikipedians-thank-each-other/


Citizens & Technology Lab - citizensandtech.org
© Creative Commons International Attribution 4.0"

lab.anewc <- "all newcomers"
lab.newc <- "newcomers"
lab.exp <- "experienced"

In [13]:
# cat.logo.filename <- "../../assets/CAT-Logo-Horizontal-social-media-preview-color.png"
# logo.img <- readPNG(cat.logo.filename)
# logo.pngob <- rasterGrob(logo.img)
nrow(all.participants)

In [34]:
# # Sys.glob(file.path(data.path,"CivilServant/projects/wikipedia-integration/gratitude-study/Data Drills/thanker/historical_survey_merged/*.csv"))


# ar.filename <- 'ar-merged-20190427.csv'
# de.filename <- 'de-merged-20190427.csv'
# fa.filename <- 'fa-merged-20190427.csv'
# pl.filename <- 'pl-merged-20190427.csv'
# thanker.folder <- "CivilServant/projects/wikipedia-integration/gratitude-study/Data Drills/thanker/historical_survey_merged/"

# ar.thankers <- read.csv(file.path(data.path, thanker.folder, ar.filename))


In [40]:
all.thanks.sent <- read.csv(file.path(data.path, tresor.path,"gratitude-second-gen-thanks-analysis-with-reciprocal.tsv"), sep="\t")
nrow(ar.thanks.sent)

In [41]:
unique(all.thanks.sent$first.gen.sender.user.name)