# Game Log Sanitization

Strip out any personally-identifiable information from game log dump so data can be shared with the community.

In [1]:
import Pkg; 
Pkg.add("CSV");
Pkg.add("DataFrames");
Pkg.add("DataFramesMeta");

using CSV;
using DataFrames;
using DataFramesMeta;
import Dates;

[32m[1m    Updating[22m[39m registry at `/opt/julia/registries/General`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m WeakRefStrings ────────────── v1.4.1
[32m[1m   Installed[22m[39m PooledArrays ──────────────── v1.4.0
[32m[1m   Installed[22m[39m Tables ────────────────────── v1.6.0
[32m[1m   Installed[22m[39m DataAPI ───────────────────── v1.9.0
[32m[1m   Installed[22m[39m SentinelArrays ────────────── v1.3.8
[32m[1m   Installed[22m[39m FilePathsBase ─────────────── v0.9.17
[32m[1m   Installed[22m[39m TranscodingStreams ────────── v0.9.6
[32m[1m   Installed[22m[39m DataValueInterfaces ───────── v1.0.0
[32m[1m   Installed[22m[39m CodecZlib ─────────────────── v0.7.0
[32m[1m   Installed[22m[39m InlineStrings ─────────────── v1.1.0
[32m[1m   Installed[22m[39m IteratorInterfaceExtensions ─ v1.0.0
[32m[1m   Installed[22m[39m TableTraits ───────────────── v1.0.1
[32m[1m   Installed[22m[39m CSV ───────

## Load the data

In [7]:
df = DataFrame(CSV.File("data.csv"));
#df = dropmissing(df, :"runner-username")
#df = dropmissing(df, :"corp-username")
size(df)

(111112, 9)

## Compute game length

In [8]:
fmt = Dates.DateFormat("yyyymmddTHHMMSSZ")
start_dt = Dates.DateTime.(df[:, :start], fmt)
end_dt = Dates.DateTime.(df[:, :end], fmt)
game_time = round.(end_dt - start_dt, Dates.Minute)
mins = map(x -> x.value, game_time)
insertcols!(df, 3, :time => mins)
size(df)

(111112, 10)

## Remove game time of day

In [9]:
transform!(df, :start => ByRow(x -> split(x, "T")[1]) => :date)
size(df)

(111112, 11)

## Hash Usernames

In [10]:
#using SHA
#using Random

#salt = randstring(5)
#runners = df[:, :"runner-username"] .* salt
#runners = bytes2hex.(sha256.(runners[:]))
#corps = df[:, :"corp-username"] .* salt
#corps = bytes2hex.(sha256.(corps[:]))
#insertcols!(df, 4, :corpusernamehash => corps)
#insertcols!(df, 5, :runnerusernamehash => runners)
#size(df)

## Write new CSV file

In [11]:
for_output = df[:, [:date, :time, :turn, :room, :format, :winner, :reason, :corp, :runner]]
fmt = Dates.DateFormat("yyyymmdd")
filename = string(Dates.format(Dates.now(), fmt), "_games.csv")
CSV.write(filename, for_output)

"20211208_games.csv"