# Game Log Sanitization

Strip out any personally-identifiable information from game log dump so data can be shared with the community.

In [1]:
import Pkg; 
Pkg.add("CSV");
Pkg.add("DataFrames");
Pkg.add("DataFramesMeta");

using CSV;
using DataFrames;
using DataFramesMeta;
import Dates;

[32m[1m    Updating[22m[39m registry at `/opt/julia/registries/General`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `/opt/julia/environments/v1.6/Project.toml`
[32m[1m  No Changes[22m[39m to `/opt/julia/environments/v1.6/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `/opt/julia/environments/v1.6/Project.toml`
[32m[1m  No Changes[22m[39m to `/opt/julia/environments/v1.6/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `/opt/julia/environments/v1.6/Project.toml`
[32m[1m  No Changes[22m[39m to `/opt/julia/environments/v1.6/Manifest.toml`


## Load the data

In [2]:
df = DataFrame(CSV.File("data.csv"));
df = dropmissing(df, :"runner-username")
df = dropmissing(df, :"corp-username")
size(df)

(312644, 11)

## Compute game length

In [3]:
fmt = Dates.DateFormat("yyyymmddTHHMMSSZ")
start_dt = Dates.DateTime.(df[:, :start], fmt)
end_dt = Dates.DateTime.(df[:, :end], fmt)
game_time = round.(end_dt - start_dt, Dates.Minute)
mins = map(x -> x.value, game_time)
insertcols!(df, 3, :time => mins)
size(df)

(312644, 12)

## Remove game time of day

In [4]:
transform!(df, :start => ByRow(x -> split(x, "T")[1]) => :date)
size(df)

(312644, 13)

## Hash Usernames

In [5]:
using SHA
using Random

salt = randstring(5)
runners = df[:, :"runner-username"] .* salt
runners = bytes2hex.(sha256.(runners[:]))
corps = df[:, :"corp-username"] .* salt
corps = bytes2hex.(sha256.(corps[:]))
insertcols!(df, 4, :corpusernamehash => corps)
insertcols!(df, 5, :runnerusernamehash => runners)
size(df)

(312644, 15)

## Write new CSV file

In [6]:
for_output = df[:, [:date, :time, :turn, :room, :format, :winner, :reason, :corp, :runner, :corpusernamehash, :runnerusernamehash]]
fmt = Dates.DateFormat("yyyymmdd")
filename = string(Dates.format(Dates.now(), fmt), "_games.csv")
CSV.write(filename, for_output)

"20210720_games.csv"