In [1]:
using CSV, DataFrames, Plots

plotly()
theme(:solarized_light)

┌ Info: For saving to png with the Plotly backend PlotlyBase has to be installed.
└ @ Plots C:\Users\sharm\.julia\packages\Plots\uCh2y\src\backends.jl:372


In [2]:
using Flux
using Flux: Data.DataLoader
using Flux: @epochs
using Random
using IterTools: ncycle

Random.seed!(123);

In [3]:
url = "https://raw.githubusercontent.com/sharmaabhishekk/undershot_backup/master/understat_all_shots.csv"
df = DataFrame(CSV.read(download(url)))

dropmissing!(df);

In [4]:
first(df, 5)

Unnamed: 0_level_0,X,Y,a_goals,a_team,date,h_a,h_goals,h_team
Unnamed: 0_level_1,Float64,Float64,Int64,String,String,String,Int64,String
1,0.728,0.373,1,Darmstadt,12-09-2015 17:30,a,0,Bayer Leverkusen
2,0.927,0.557,2,Werder Bremen,07-12-2014 16:30,a,5,Eintracht Frankfurt
3,0.984,0.476,2,Darmstadt,17-10-2015 17:30,a,0,Augsburg
4,0.885,0.433,0,Darmstadt,01-11-2015 18:30,a,2,VfB Stuttgart
5,0.942,0.635,0,Darmstadt,01-11-2015 18:30,a,2,VfB Stuttgart


In [5]:
@show names(df);

names(df) = ["X", "Y", "a_goals", "a_team", "date", "h_a", "h_goals", "h_team", "id", "lastAction", "match_id", "minute", "player", "player_assisted", "player_id", "result", "season", "shotType", "situation", "xG"]


In [6]:
get_target(val) = val == "Goal" ? "Goal" : "Miss"
df[:target] = map(get_target, df[:result]);

In [7]:
# Convert df to array
data = convert(Array, df[["X", "Y", "minute", "target"]])

# Shuffle
data = data[shuffle(1:end), :]

# train/test split
train_test_ratio = .8
idx = Int(floor(size(df, 1) * train_test_ratio))
data_train = data[1:idx,:]
data_test = data[idx+1:end, :]

# Get feature vectors
get_feat(d) = transpose(convert(Array{Float32},d[:, 1:end-1]))
x_train = get_feat(data_train)
x_test = get_feat(data_test)

# One hot labels
onehot(d) = Flux.onehotbatch(d[:,end], unique(df.target))

y_train = onehot(data_train)
y_test = onehot(data_test);

In [8]:
batch_size= 128
train_dl = DataLoader((x_train, y_train), batchsize=batch_size, shuffle=true)
test_dl = DataLoader((x_test, y_test), batchsize=batch_size);

In [9]:
### Model ------------------------------
function get_model()
    c = Chain(
        Dense(3,28,relu),
        Dense(28,2)
    )
end

model = get_model()

### Loss ------------------------------
loss(x,y) = Flux.Losses.logitbinarycrossentropy(model(x), y)

train_losses = []
test_losses = []
train_acces = []
test_acces = []

### Optimiser ------------------------------
lr = 0.001
opt = ADAM(lr, (0.9, 0.999))

### Callbacks ------------------------------
function loss_all(data_loader)
    sum([loss(x, y) for (x,y) in data_loader]) / length(data_loader) 
end

callbacks = [
    () -> push!(train_losses, loss_all(train_dl)),
    () -> push!(test_losses, loss_all(test_dl)),

];

#### Training

In [None]:
epochs = 1 #30 
ps = Flux.params(model)

@epochs epochs Flux.train!(loss, ps, train_dl, opt, cb = callbacks)

@show train_loss = loss_all(train_dl)
@show test_loss = loss_all(test_dl)