# Velogames solver: Giro d'Italia 2023

## Load libraries

In [54]:
using Statistics

# include all files in src directory
source_files = readdir("src")
for file in source_files
    include("src/$file")
end



## Load data

In [3]:
FORM_WEIGHT = 0

rider_df = getvgriders("https://www.velogames.com/italy/2023/riders.php")

mycols = [:mountain_me :gc_me, :sprint_me, :overall_me]
# getpcsranking for each col, filter the dataframe to only the points and riderkey columns, and rename the points column to the col name.
pcs_dfs = map(mycols) do col
    rename(getpcsranking(col), :points => col)[:, [col, :riderkey]]
end

# use reduce to join all dataframes in pcs_dfs on the riderkey column
pcs_df = reduce((x, y) -> outerjoin(x, y, on=:riderkey), pcs_dfs)

# join the velogames and pcs dataframes on the riderkey column
rider_df = leftjoin(rider_df, pcs_df, on=:riderkey)
rider_df = coalesce.(rider_df, 0)

# create calcpcsscore column which is the sum of All Rounder * gc, Sprinter * sprint, Climber * mountain, and Unclassed * overall
rider_df.calcpcsscore = rider_df.allrounder .* rider_df.gc_me .+ rider_df.sprinter .* rider_df.sprint_me .+ rider_df.climber .* rider_df.mountain_me .+ rider_df.unclassed .* rider_df.overall_me

# create calc_score column which is a weighted average of the calcpcsscore and points columns, adjusted for the difference in magnitude
ADJ_FACTOR = mean(rider_df.points) / mean(rider_df.calcpcsscore)
rider_df.calc_score = (1 - FORM_WEIGHT) .* ADJ_FACTOR .* rider_df.calcpcsscore .+ FORM_WEIGHT .* rider_df.points;

In [57]:
rider_df = getvgriders("https://www.velogames.com/italy/2023/riders.php")


Row,Unnamed: 1_level_0,rider,team,class_raw,cost,selected,points,riderkey,class,allrounder,climber,sprinter,unclassed,value
Unnamed: 0_level_1,String,String,String,String,Int64,String,Float64,String,String,Bool,Bool,Bool,Bool,Float64
1,,Remco Evenepoel,Soudal - Quick Step,All Rounder,26,68.7%,428.0,ceeeeelmnooprv,allrounder,true,false,false,false,16.4615
2,,Primož Roglič,Jumbo-Visma,All Rounder,24,53.7%,309.0,giilmooprr,allrounder,true,false,false,false,12.875
3,,Geraint Thomas,INEOS Grenadiers,All Rounder,18,11.3%,171.0,aaeghimnorstt,allrounder,true,false,false,false,9.5
4,,Tao Geoghegan Hart,INEOS Grenadiers,Climber,16,25.2%,292.0,aaaeeggghhnoortt,climber,false,true,false,false,18.25
5,,João Almeida,UAE Team Emirates,All Rounder,16,38.6%,303.0,aadeijlmoo,allrounder,true,false,false,false,18.9375
6,,Aleksandr Vlasov,BORA - hansgrohe,All Rounder,14,12.9%,200.0,aaadekllnorssvv,allrounder,true,false,false,false,14.2857
7,,Mads Pedersen,Trek - Segafredo,Sprinter,14,39.0%,422.0,addeeemnprss,sprinter,false,false,true,false,30.1429
8,,Jack Haig,Bahrain - Victorious,Climber,12,10.6%,41.0,aacghijk,climber,false,true,false,false,3.41667
9,,Thymen Arensman,INEOS Grenadiers,All Rounder,12,4.7%,31.0,aaeehmmnnnrsty,allrounder,true,false,false,false,2.58333
10,,Pavel Sivakov,INEOS Grenadiers,All Rounder,12,3.5%,50.0,aaeiklopsvvv,allrounder,true,false,false,false,4.16667


In [58]:
# for each rider, get pcsriderpts for that rider
pcsriderpts = map(x -> getpcsriderpts(x), rider_df.rider)


LoadError: UndefVarError: riderdf not defined

In [82]:
df_pcsriderpts = vcat(DataFrame.(rider_df.pcsriderpts)...)
rename!(
    df_pcsriderpts, 
    [x => "pts_" * string(x) for x in names(df_pcsriderpts)]
    )
# hcat(rider_df, df_pcsriderpts)

Row,pts_climber,pts_gc,pts_oneday,pts_sprint,pts_tt
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64
1,2506,2894,1889,104,1993
2,7444,6294,1537,163,4526
3,5096,5778,1236,909,4817
4,2320,2397,261,100,709
5,2754,2634,396,107,1452
6,3282,2593,918,81,624
7,1410,863,2552,2043,1237
8,2606,2483,605,93,268
9,986,870,32,18,681
10,1619,1834,459,39,430


In [84]:
show(rider_df)

[1m176×15 DataFrame[0m
[1m Row [0m│[1m        [0m[1m rider                   [0m[1m team                  [0m[1m class_raw   [0m[1m co[0m ⋯
     │[90m String [0m[90m String                  [0m[90m String                [0m[90m String      [0m[90m In[0m ⋯
─────┼──────────────────────────────────────────────────────────────────────────
   1 │         Remco Evenepoel          Soudal - Quick Step    All Rounder     ⋯
   2 │         Primož Roglič            Jumbo-Visma            All Rounder
   3 │         Geraint Thomas           INEOS Grenadiers       All Rounder
   4 │         Tao Geoghegan Hart       INEOS Grenadiers       Climber
   5 │         João Almeida             UAE Team Emirates      All Rounder     ⋯
   6 │         Aleksandr Vlasov         BORA - hansgrohe       All Rounder
   7 │         Mads Pedersen            Trek - Segafredo       Sprinter
   8 │         Jack Haig                Bahrain - Victorious   Climber
   9 │         Thymen Arensman        

Row,climber,gc,oneday,sprint,tt
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64
1,2506,2894,1889,104,1993
2,7444,6294,1537,163,4526
3,5096,5778,1236,909,4817
4,2320,2397,261,100,709
5,2754,2634,396,107,1452
6,3282,2593,918,81,624
7,1410,863,2552,2043,1237
8,2606,2483,605,93,268
9,986,870,32,18,681
10,1619,1834,459,39,430


## Build model

In [4]:
model_results = build_model_stage(rider_df);

Running HiGHS 1.5.1 [date: 1970-01-01, git hash: 93f1876e4]
Copyright (c) 2023 HiGHS under MIT licence terms
Presolving model
6 rows, 176 cols, 528 nonzeros
6 rows, 56 cols, 153 nonzeros
6 rows, 52 cols, 142 nonzeros

Solving MIP model with:
   6 rows
   52 cols (49 binary, 3 integer, 0 implied int., 0 continuous)
   142 nonzeros

        Nodes      |    B&B Tree     |            Objective Bounds              |  Dynamic Constraints |       Work      
     Proc. InQueue |  Leaves   Expl. | BestBound       BestSol              Gap |   Cuts   InLp Confl. | LpIters     Time

         0       0         0   0.00%   10636           -inf                 inf        0      0      0         0     0.0s
 T       0       0         0   0.00%   10636           3439.860572      209.20%        0      0      0         7     0.0s

Solving report
  Status            Optimal
  Primal bound      3439.86057201
  Dual bound        3439.86057201
  Gap               0% (tolerance: 0.01%)
  Solution status   feas

In [5]:
model_results.data |> sum

9.0

In [6]:
# total cost
rider_df.cost .* model_results.data |> sum

100.0

## Results

In [7]:
# selected riders
rider_df[!, :chosen] = model_results.data .> 0.5
chosen_team = filter(:chosen => ==(true), rider_df)
chosen_team[:, [:rider, :team, :class_raw, :selected, :points, :calcpcsscore, :cost]]

Row,rider,team,class_raw,selected,points,calcpcsscore,cost
Unnamed: 0_level_1,String,String,String,String,Float64,Float64,Int64
1,Mads Pedersen,Trek - Segafredo,Sprinter,39.0%,422.0,937.0,14
2,Remco Evenepoel,Soudal - Quick Step,All Rounder,68.7%,428.0,1105.0,26
3,João Almeida,UAE Team Emirates,All Rounder,38.6%,303.0,720.0,16
4,Santiago Buitrago,Bahrain - Victorious,Climber,19.8%,53.0,552.0,8
5,Lorenzo Rota,Intermarché - Circus - Wanty,Unclassed,3.2%,0.0,983.0,6
6,Stefan Küng,Groupama - FDJ,Unclassed,19.2%,174.0,1100.7,8
7,Patrick Konrad,BORA - hansgrohe,Climber,5.2%,50.0,460.0,6
8,Ben Healy,EF Education-EasyPost,Unclassed,35.3%,0.0,705.0,6
9,Filippo Ganna,INEOS Grenadiers,Unclassed,39.3%,272.0,1036.3,10


In [8]:
# how much would that team have scored so far?
chosen_team.points |> sum

1702.0

### Mucking about

In [16]:
using TableScraper

page_tables = scrape_tables("https://www.procyclingstats.com/rider/kaden-groves")

2-element Vector{TableScraper.Table}:
 TableScraper.Table(Any[["06.05 » 28.05", "", "", "", "Giro d'Italia (2.UWT)", "", "", "", "more"], ["10.05", "1", "80", "", "Stage 5 - Atripalda  › Salerno", "171", "80", "180", "more"], ["09.05", "118", "84", "", "Stage 4 - Venosa › Lago Laceno", "175", "", "", "more"], ["08.05", "3", "51", "", "Stage 3 - Vasto › Melfi", "213", "35", "95", "more"], ["07.05", "3", "105", "", "Stage 2 - Teramo › San Salvo", "202", "35", "95", "more"], ["06.05", "142", "142", "", "Stage 1 (ITT) - Fossacesia Marina › Ortona", "19.6", "", "", "more"], ["09.04", "31", "", "", "Paris-Roubaix (1.UWT)", "256.6", "9", "15", "more"], ["01.04", "1", "", "", "Volta Limburg Classic (1.1)94k", "193.4", "75", "125", "more"], ["20.03 » 26.03", "", "", "", "Volta Ciclista a Catalunya (2.UWT)", "", "", "", "more"], ["", "3", "", "", "Points classification", "", "", "", "more"]  …  ["17.01 » 22.01", "", "", "", "Santos Tour Down Under (2.UWT)", "", "", "", "more"], ["", "23", "", ""

Row,Unnamed: 1_level_0,Points,position
Unnamed: 0_level_1,String,String,String
1,2023,380,43
2,2022,484,108
3,2021,82,510
4,2020,130,259
5,2019,99,604
6,2018,168,372
7,2017,29,1212


In [49]:

rider_name = "Tadej Pogačar"

regularised_name = replace(
    Unicode.normalize(rider_name, stripmark=true, stripcc=true, casefold=true),
    " " => "-"
    )
pageurl = "https://www.procyclingstats.com/rider/" * regularised_name

raw_pts = DataFrame(scrape_tables(pageurl)[2])
rename!(raw_pts, [:year, :points, :rank])

Row,year,points,rank
Unnamed: 0_level_1,String,String,String
1,2023,1674,1
2,2022,3413,1
3,2021,3305,1
4,2020,2063,2
5,2019,1515,13
6,2018,241,268
7,2017,79,718


In [47]:
DataFrame(raw_pts)

Row,Unnamed: 1_level_0,Points,position
Unnamed: 0_level_1,String,String,String
1,2023,1674,1
2,2022,3413,1
3,2021,3305,1
4,2020,2063,2
5,2019,1515,13
6,2018,241,268
7,2017,79,718
