In [None]:
using DataFrames, XLSX, Statistics

In [None]:
# Read the dataset of cricket players and their ICC batting points from "batsmen_new.xlsx"
xlsx_file = "batsmen_new.xlsx"
df = DataFrame(XLSX.readtable(xlsx_file, "Sheet1"))

Row,Player_Name,Player_Nation,ICC_Batting_Points
Unnamed: 0_level_1,Any,Any,Any
1,Babar Azam,PAK,857
2,Shubman Gill,IND,839
3,Rassie van der Dussen,SA,743
4,Harry Tector,IRE,735
5,David Warner,AUS,729
6,Imam-ul-Haq,PAK,728
7,Quinton de Kock,SA,714
8,Heinrich Klaasen,SA,698
9,Virat Kohli,IND,696
10,Rohit Sharma,IND,695


In [None]:
# Drop all the rows with NaN scores
df = dropmissing(df)

Row,Player_Name,Player_Nation,ICC_Batting_Points
Unnamed: 0_level_1,Any,Any,Any
1,Babar Azam,PAK,857
2,Shubman Gill,IND,839
3,Rassie van der Dussen,SA,743
4,Harry Tector,IRE,735
5,David Warner,AUS,729
6,Imam-ul-Haq,PAK,728
7,Quinton de Kock,SA,714
8,Heinrich Klaasen,SA,698
9,Virat Kohli,IND,696
10,Rohit Sharma,IND,695


In [None]:
# Sort the dataset by batting points in descending order
sort!(df, :ICC_Batting_Points, rev=true)

Row,Player_Name,Player_Nation,ICC_Batting_Points
Unnamed: 0_level_1,Any,Any,Any
1,Babar Azam,PAK,857
2,Shubman Gill,IND,839
3,Rassie van der Dussen,SA,743
4,Harry Tector,IRE,735
5,David Warner,AUS,729
6,Imam-ul-Haq,PAK,728
7,Quinton de Kock,SA,714
8,Heinrich Klaasen,SA,698
9,Virat Kohli,IND,696
10,Rohit Sharma,IND,695


In [None]:
# Calculate the percentile rank for each player
df[!, :Percentile_Rank] = [searchsortedlast(sort(df[!, :ICC_Batting_Points]), x) / nrow(df) * 100 for x in df[!, :ICC_Batting_Points]]
df

Row,Player_Name,Player_Nation,ICC_Batting_Points,Percentile_Rank
Unnamed: 0_level_1,Any,Any,Any,Float64
1,Babar Azam,PAK,857,100.0
2,Shubman Gill,IND,839,97.9167
3,Rassie van der Dussen,SA,743,95.8333
4,Harry Tector,IRE,735,93.75
5,David Warner,AUS,729,91.6667
6,Imam-ul-Haq,PAK,728,89.5833
7,Quinton de Kock,SA,714,87.5
8,Heinrich Klaasen,SA,698,85.4167
9,Virat Kohli,IND,696,83.3333
10,Rohit Sharma,IND,695,81.25


In [None]:
# Set the percentile score of the lowest-ranked player to 1
min_rank = minimum(df[!, :Percentile_Rank])
df[!, :Percentile_Score] .= @. 100 - ((df[!, :Percentile_Rank] - min_rank) / (100 - min_rank))
df

Row,Player_Name,Player_Nation,ICC_Batting_Points,Percentile_Rank,Percentile_Score
Unnamed: 0_level_1,Any,Any,Any,Float64,Float64
1,Babar Azam,PAK,857,100.0,99.0
2,Shubman Gill,IND,839,97.9167,99.0213
3,Rassie van der Dussen,SA,743,95.8333,99.0426
4,Harry Tector,IRE,735,93.75,99.0638
5,David Warner,AUS,729,91.6667,99.0851
6,Imam-ul-Haq,PAK,728,89.5833,99.1064
7,Quinton de Kock,SA,714,87.5,99.1277
8,Heinrich Klaasen,SA,698,85.4167,99.1489
9,Virat Kohli,IND,696,83.3333,99.1702
10,Rohit Sharma,IND,695,81.25,99.1915


In [None]:
# Define a dictionary mapping short country names to full country names
country_names = Dict(
    "IND" => "India",
    "PAK" => "Pakistan",
    "AFG" => "Afghanistan",
    "SL" => "Sri Lanka",
    "BAN" => "Bangladesh",
    "NZ" => "New Zealand",
    "AUS" => "Australia",
    "SA" => "South Africa",
    "ENG" => "England",
    "NED" => "Netherlands",
    "WI" => "West Indies",
    "ZIM" => "Zimbabwe",
    "IRE" => "Ireland",
    "SCO" => "Scotland",
    "NAM" => "Namibia"
)

Dict{String, String} with 15 entries:
  "ENG" => "England"
  "WI"  => "West Indies"
  "IRE" => "Ireland"
  "PAK" => "Pakistan"
  "NED" => "Netherlands"
  "AUS" => "Australia"
  "SA"  => "South Africa"
  "AFG" => "Afghanistan"
  "NZ"  => "New Zealand"
  "ZIM" => "Zimbabwe"
  "IND" => "India"
  "SCO" => "Scotland"
  "NAM" => "Namibia"
  "SL"  => "Sri Lanka"
  "BAN" => "Bangladesh"

In [None]:
# Replace the short country names with full country names in the "Player_Nation" column
df[!, :Player_Nation] .= [country_names[name] for name in df[!, :Player_Nation]]
df

Row,Player_Name,Player_Nation,ICC_Batting_Points,Percentile_Rank,Percentile_Score
Unnamed: 0_level_1,Any,String,Any,Float64,Float64
1,Babar Azam,Pakistan,857,100.0,99.0
2,Shubman Gill,India,839,97.9167,99.0213
3,Rassie van der Dussen,South Africa,743,95.8333,99.0426
4,Harry Tector,Ireland,735,93.75,99.0638
5,David Warner,Australia,729,91.6667,99.0851
6,Imam-ul-Haq,Pakistan,728,89.5833,99.1064
7,Quinton de Kock,South Africa,714,87.5,99.1277
8,Heinrich Klaasen,South Africa,698,85.4167,99.1489
9,Virat Kohli,India,696,83.3333,99.1702
10,Rohit Sharma,India,695,81.25,99.1915


In [None]:
# Display the first 5 rows of the DataFrame
first(df, 5)

Row,Player_Name,Player_Nation,ICC_Batting_Points,Percentile_Rank,Percentile_Score
Unnamed: 0_level_1,Any,String,Any,Float64,Float64
1,Babar Azam,Pakistan,857,100.0,99.0
2,Shubman Gill,India,839,97.9167,99.0213
3,Rassie van der Dussen,South Africa,743,95.8333,99.0426
4,Harry Tector,Ireland,735,93.75,99.0638
5,David Warner,Australia,729,91.6667,99.0851


In [None]:
batsman = df[!, :Player_Name]
batsman_country = df[!, :Player_Nation]
batsman_rank = df[!, :Percentile_Rank]


48-element Vector{Float64}:
 100.0
  97.91666666666666
  95.83333333333334
  93.75
  91.66666666666666
  89.58333333333334
  87.5
  85.41666666666666
  83.33333333333334
  81.25
  79.16666666666666
  77.08333333333334
  75.0
   ⋮
  25.0
  22.916666666666664
  20.833333333333336
  18.75
  16.666666666666664
  14.583333333333334
  12.5
  10.416666666666668
   8.333333333333332
   8.333333333333332
   4.166666666666666
   2.083333333333333

In [None]:
# Read the dataset of cricket players and their ICC bowling points from "bowlers_new.xlsx"
xlsx_file = "bowlers_new.xlsx"
df_bowlers = DataFrame(XLSX.readtable(xlsx_file, "Sheet1"))

Row,Bowler_Name,Player_Nation,ICC_Bowling_Points
Unnamed: 0_level_1,Any,Any,Any
1,Mohammed Siraj,India,669
2,Josh Hazlewood,Australia,669
3,Mujeeb Ur Rahman,Afghanistan,657
4,Rashid Khan,Afghanistan,655
5,Trent Boult,New Zealand,653
6,Shaheen Afridi,Pakistan,632
7,Adam Zampa,Australia,631
8,Mitchell Starc,Australia,628
9,Matt Henry,New Zealand,626
10,Mohammad Nabi,Afghanistan,621


In [None]:
# Drop all the rows with NaN scores
df_bowlers = dropmissing(df_bowlers)

Row,Bowler_Name,Player_Nation,ICC_Bowling_Points
Unnamed: 0_level_1,Any,Any,Any
1,Mohammed Siraj,India,669
2,Josh Hazlewood,Australia,669
3,Mujeeb Ur Rahman,Afghanistan,657
4,Rashid Khan,Afghanistan,655
5,Trent Boult,New Zealand,653
6,Shaheen Afridi,Pakistan,632
7,Adam Zampa,Australia,631
8,Mitchell Starc,Australia,628
9,Matt Henry,New Zealand,626
10,Mohammad Nabi,Afghanistan,621


In [None]:
# Sort the dataset by bowling points in descending order
sort!(df_bowlers, :ICC_Bowling_Points, rev=true)

Row,Bowler_Name,Player_Nation,ICC_Bowling_Points
Unnamed: 0_level_1,Any,Any,Any
1,Mohammed Siraj,India,669
2,Josh Hazlewood,Australia,669
3,Mujeeb Ur Rahman,Afghanistan,657
4,Rashid Khan,Afghanistan,655
5,Trent Boult,New Zealand,653
6,Shaheen Afridi,Pakistan,632
7,Adam Zampa,Australia,631
8,Mitchell Starc,Australia,628
9,Matt Henry,New Zealand,626
10,Mohammad Nabi,Afghanistan,621


In [None]:
# Calculate the percentile rank for each player
df_bowlers[!, :Percentile_Rank] .= [searchsortedlast(sort(df_bowlers[!, :ICC_Bowling_Points]), x) / nrow(df_bowlers) * 100 for x in df_bowlers[!, :ICC_Bowling_Points]]
df_bowlers

Row,Bowler_Name,Player_Nation,ICC_Bowling_Points,Percentile_Rank
Unnamed: 0_level_1,Any,Any,Any,Float64
1,Mohammed Siraj,India,669,100.0
2,Josh Hazlewood,Australia,669,100.0
3,Mujeeb Ur Rahman,Afghanistan,657,94.2857
4,Rashid Khan,Afghanistan,655,91.4286
5,Trent Boult,New Zealand,653,88.5714
6,Shaheen Afridi,Pakistan,632,85.7143
7,Adam Zampa,Australia,631,82.8571
8,Mitchell Starc,Australia,628,80.0
9,Matt Henry,New Zealand,626,77.1429
10,Mohammad Nabi,Afghanistan,621,74.2857


In [None]:
# Set the percentile score of the lowest-ranked player to 1
min_rank = minimum(df_bowlers[!, :Percentile_Rank])
df_bowlers[!, :Percentile_Score] .= @. 100 - ((df_bowlers[!, :Percentile_Rank] - min_rank) / (100 - min_rank))
df_bowlers

Row,Bowler_Name,Player_Nation,ICC_Bowling_Points,Percentile_Rank,Percentile_Score
Unnamed: 0_level_1,Any,Any,Any,Float64,Float64
1,Mohammed Siraj,India,669,100.0,99.0
2,Josh Hazlewood,Australia,669,100.0,99.0
3,Mujeeb Ur Rahman,Afghanistan,657,94.2857,99.0588
4,Rashid Khan,Afghanistan,655,91.4286,99.0882
5,Trent Boult,New Zealand,653,88.5714,99.1176
6,Shaheen Afridi,Pakistan,632,85.7143,99.1471
7,Adam Zampa,Australia,631,82.8571,99.1765
8,Mitchell Starc,Australia,628,80.0,99.2059
9,Matt Henry,New Zealand,626,77.1429,99.2353
10,Mohammad Nabi,Afghanistan,621,74.2857,99.2647


In [None]:
#converting to list
bowler = df_bowlers[!, :Bowler_Name]
bowler_country = df_bowlers[!, :Player_Nation]
bowler_rank = df_bowlers[!, :Percentile_Rank]

35-element Vector{Float64}:
 100.0
 100.0
  94.28571428571428
  91.42857142857143
  88.57142857142857
  85.71428571428571
  82.85714285714286
  80.0
  77.14285714285715
  74.28571428571429
  71.42857142857143
  68.57142857142857
  65.71428571428571
   ⋮
  34.285714285714285
  31.428571428571427
  28.57142857142857
  28.57142857142857
  22.857142857142858
  20.0
  17.142857142857142
  14.285714285714285
  11.428571428571429
   8.571428571428571
   5.714285714285714
   2.857142857142857

In [None]:
df = DataFrame(Player_Name = batsman, Player_Nation = batsman_country, PercentileRank = batsman_rank)

Row,Player_Name,Player_Nation,PercentileRank
Unnamed: 0_level_1,Any,String,Float64
1,Babar Azam,Pakistan,100.0
2,Shubman Gill,India,97.9167
3,Rassie van der Dussen,South Africa,95.8333
4,Harry Tector,Ireland,93.75
5,David Warner,Australia,91.6667
6,Imam-ul-Haq,Pakistan,89.5833
7,Quinton de Kock,South Africa,87.5
8,Heinrich Klaasen,South Africa,85.4167
9,Virat Kohli,India,83.3333
10,Rohit Sharma,India,81.25


In [None]:
# Sort the DataFrame by the "Player_Nation" column
sort!(df, :Player_Nation)

Row,Player_Name,Player_Nation,PercentileRank
Unnamed: 0_level_1,Any,String,Float64
1,Ibrahim Zadran,Afghanistan,66.6667
2,Rahmanullah Gurbaz,Afghanistan,29.1667
3,Rahmat Shah,Afghanistan,22.9167
4,David Warner,Australia,91.6667
5,Steve Smith,Australia,77.0833
6,Travis Head,Australia,56.25
7,Marnus Labuschagne,Australia,45.8333
8,Alex Carey,Australia,31.25
9,Glenn Maxwell,Australia,16.6667
10,Mitchell Marsh,Australia,8.33333


In [None]:
df2 = DataFrame(Player_Name = bowler, Player_Nation = bowler_country, Percentile_Rank = bowler_rank)

Row,Player_Name,Player_Nation,Percentile_Rank
Unnamed: 0_level_1,Any,Any,Float64
1,Mohammed Siraj,India,100.0
2,Josh Hazlewood,Australia,100.0
3,Mujeeb Ur Rahman,Afghanistan,94.2857
4,Rashid Khan,Afghanistan,91.4286
5,Trent Boult,New Zealand,88.5714
6,Shaheen Afridi,Pakistan,85.7143
7,Adam Zampa,Australia,82.8571
8,Mitchell Starc,Australia,80.0
9,Matt Henry,New Zealand,77.1429
10,Mohammad Nabi,Afghanistan,74.2857


In [None]:
# Sort the DataFrame by the "Player_Nation" column
sort!(df2, :Player_Nation)

Row,Player_Name,Player_Nation,Percentile_Rank
Unnamed: 0_level_1,Any,Any,Float64
1,Mujeeb Ur Rahman,Afghanistan,94.2857
2,Rashid Khan,Afghanistan,91.4286
3,Mohammad Nabi,Afghanistan,74.2857
4,Josh Hazlewood,Australia,100.0
5,Adam Zampa,Australia,82.8571
6,Mitchell Starc,Australia,80.0
7,Pat Cummins,Australia,28.5714
8,Shakib Al Hasan,Bangladesh,54.2857
9,Mustafizur Rahman,Bangladesh,40.0
10,Chris Woakes,England,68.5714


In [None]:
# Define the list of countries to keep
countries_to_keep = ["India", "Pakistan", "Afghanistan", "Sri Lanka", "Bangladesh", "New Zealand", "Australia", "South Africa", "England", "Netherlands"]

10-element Vector{String}:
 "India"
 "Pakistan"
 "Afghanistan"
 "Sri Lanka"
 "Bangladesh"
 "New Zealand"
 "Australia"
 "South Africa"
 "England"
 "Netherlands"

In [None]:
# Filter the DataFrame to only include rows from the countries in the list
df_filtered_bowl = filter(row -> in(row.Player_Nation, countries_to_keep), df2)

Row,Player_Name,Player_Nation,Percentile_Rank
Unnamed: 0_level_1,Any,Any,Float64
1,Mujeeb Ur Rahman,Afghanistan,94.2857
2,Rashid Khan,Afghanistan,91.4286
3,Mohammad Nabi,Afghanistan,74.2857
4,Josh Hazlewood,Australia,100.0
5,Adam Zampa,Australia,82.8571
6,Mitchell Starc,Australia,80.0
7,Pat Cummins,Australia,28.5714
8,Shakib Al Hasan,Bangladesh,54.2857
9,Mustafizur Rahman,Bangladesh,40.0
10,Chris Woakes,England,68.5714


In [None]:
# Filter the DataFrame to only include rows from the countries in the list
df_filtered_bat = filter(row -> in(row.Player_Nation, countries_to_keep), df)

Row,Player_Name,Player_Nation,PercentileRank
Unnamed: 0_level_1,Any,String,Float64
1,Ibrahim Zadran,Afghanistan,66.6667
2,Rahmanullah Gurbaz,Afghanistan,29.1667
3,Rahmat Shah,Afghanistan,22.9167
4,David Warner,Australia,91.6667
5,Steve Smith,Australia,77.0833
6,Travis Head,Australia,56.25
7,Marnus Labuschagne,Australia,45.8333
8,Alex Carey,Australia,31.25
9,Glenn Maxwell,Australia,16.6667
10,Mitchell Marsh,Australia,8.33333


In [None]:
# Define the dictionary to store the results
team_dict = Dict{String, Vector{Float64}}()

# Iterate over the DataFrame and add each team to the dictionary
for row in eachrow(df_filtered_bat)
    team = row.Player_Nation
    percentile_rank = row.PercentileRank

    # If the team is not already in the dictionary, add it
    if haskey(team_dict, team)
        team_dict[team][1] += 1
        team_dict[team][2] += percentile_rank
    else
        team_dict[team] = [1, percentile_rank]
    end
end

In [None]:
# Print the dictionary
team_dict

Dict{String, Vector{Float64}} with 10 entries:
  "Bangladesh"   => [4.0, 127.083]
  "South Africa" => [6.0, 456.25]
  "England"      => [6.0, 327.083]
  "India"        => [7.0, 414.583]
  "Australia"    => [7.0, 327.083]
  "Netherlands"  => [1.0, 27.0833]
  "Afghanistan"  => [3.0, 118.75]
  "New Zealand"  => [3.0, 102.083]
  "Pakistan"     => [4.0, 281.25]
  "Sri Lanka"    => [2.0, 60.4167]

In [None]:
# Define the dictionary to store the team scores
team_scores = Dict{String, Float64}()

Dict{String, Float64}()

In [None]:
# Iterate over the DataFrame and calculate the score for each team
for row in eachrow(df_filtered_bat)
    team = row.Player_Nation
    percentile_rank = row.PercentileRank

    # If the team is not already in the dictionary, add it.
    if !haskey(team_scores, team)
        team_scores[team] = 0.0
    end

    # Add the percentile rank of the player to the team score.
    team_scores[team] += percentile_rank

    # Get the number of players from the team in the DataFrame.
    num_players = sum(df_filtered_bat.Player_Nation .== team)

    # If there are fewer than 7 players from the team in the DataFrame, calculate the score for missing players.
    if num_players < 7
        # Get the lowest score of players from that team.
        lowest_score = minimum(df_filtered_bat[df_filtered_bat.Player_Nation .== team, :PercentileRank])

        # Calculate the score for the missing players.
        missing_player_score = (lowest_score / 3.0) * (7 - num_players)

        # Add the score for the missing players to the team score.
        team_scores[team] += missing_player_score
    end
end

In [None]:
# Create arrays to store the results
teams = String[]
batting_score = Float64[]

Float64[]

In [None]:
# Print the results and store them in arrays
for (team, score) in team_scores
    println("$team : $score")
    push!(teams, team)
    push!(batting_score, score)
end

Bangladesh : 160.41666666666669
South Africa : 539.5833333333334
England : 377.0833333333333
India : 414.5833333333333
Australia : 327.0833333333333
Netherlands : 81.24999999999999
Afghanistan : 210.41666666666663
New Zealand : 160.41666666666669
Pakistan : 331.25
Sri Lanka : 95.1388888888889


In [None]:
sorted_team_scores = sort(collect(team_scores), by=x->x[2], rev=true)

10-element Vector{Pair{String, Float64}}:
 "South Africa" => 539.5833333333334
        "India" => 414.5833333333333
      "England" => 377.0833333333333
     "Pakistan" => 331.25
    "Australia" => 327.0833333333333
  "Afghanistan" => 210.41666666666663
   "Bangladesh" => 160.41666666666669
  "New Zealand" => 160.41666666666669
    "Sri Lanka" => 95.1388888888889
  "Netherlands" => 81.24999999999999

In [None]:
# Print the results
for (team, score) in sorted_team_scores
    println("$team : $score")
end

South Africa : 539.5833333333334
India : 414.5833333333333
England : 377.0833333333333
Pakistan : 331.25
Australia : 327.0833333333333
Afghanistan : 210.41666666666663
Bangladesh : 160.41666666666669
New Zealand : 160.41666666666669
Sri Lanka : 95.1388888888889
Netherlands : 81.24999999999999


In [None]:
# Define the dictionary to store the results
team_dict_bowl = Dict{String, Vector{Int}}()

# Iterate over the DataFrame and add each team to the dictionary
for row in eachrow(df_filtered_bowl)
    team = row.Player_Nation
    percentile_rank = round(Int, row.Percentile_Rank)  # Convert to Int

    # If the team is not already in the dictionary, add it
    if !haskey(team_dict_bowl, team)
        team_dict_bowl[team] = [0, 0]
    end

    # Increment the number of players from the team
    team_dict_bowl[team][1] += 1

    # Add the percentile rank of the player to the cumulative sum
    team_dict_bowl[team][2] += percentile_rank
end

In [None]:
# Add Netherlands if not already in the dictionary
if !haskey(team_dict_bowl, "Netherlands")
    team_dict_bowl["Netherlands"] = [1, 125]
else
    team_dict_bowl["Netherlands"][1] += 1
    team_dict_bowl["Netherlands"][2] += 125
end

2-element Vector{Int64}:
   1
 125

In [None]:
# Define the dictionary to store the team scores
team_scores = Dict{String, Float64}()

Dict{String, Float64}()

In [None]:
# Iterate over the DataFrame and calculate the score for each team
for row in eachrow(df_filtered_bowl)
    team = row.Player_Nation
    percentile_rank = row.Percentile_Rank

    # If the team is not already in the dictionary, add it.
    if !haskey(team_scores, team)
        team_scores[team] = 0.0
    end

    # Add the percentile rank of the player to the team score.
    team_scores[team] += percentile_rank

    # Get the number of players from the team in the DataFrame.
    num_players = sum(df_filtered_bowl.Player_Nation .== team)

    # If there are less than 7 players from the team in the DataFrame, calculate the score for missing players.
    if num_players < 7
        # Get the lowest score of players from that team.
        lowest_score = minimum(df_filtered_bowl[df_filtered_bowl.Player_Nation .== team, :Percentile_Rank])

        # Calculate the score for the missing players.
        missing_player_score = (lowest_score / 3.0) * (7 - num_players)

        # Add the score for the missing players to the team score.
        team_scores[team] += missing_player_score
    end
end

In [None]:
bowling_score = Float64[]

Float64[]

In [None]:
# Print the results and store them in the 'bowling_score' array
for (team, score) in team_scores
    println("$team : $score")
    push!(bowling_score, score)
end

Bangladesh : 227.61904761904765
South Africa : 239.99999999999997
England : 145.7142857142857
India : 314.2857142857142
Australia : 405.71428571428567
Afghanistan : 557.1428571428571
New Zealand : 422.8571428571429
Pakistan : 271.4285714285714
Sri Lanka : 171.42857142857142


In [None]:
bowling_score = Float64[]

Float64[]

In [None]:
println(bowling_score)
# Print the results and store them in the 'bowling_score' array
for (team, score) in team_scores
    println("$team : $score")
    push!(bowling_score, score)
end
insert!(bowling_score, 6, 125.0)

Float64[]
Bangladesh : 227.61904761904765
South Africa : 239.99999999999997
England : 145.7142857142857
India : 314.2857142857142
Australia : 405.71428571428567
Afghanistan : 557.1428571428571
New Zealand : 422.8571428571429
Pakistan : 271.4285714285714
Sri Lanka : 171.42857142857142


10-element Vector{Float64}:
 227.61904761904765
 239.99999999999997
 145.7142857142857
 314.2857142857142
 405.71428571428567
 125.0
 557.1428571428571
 422.8571428571429
 271.4285714285714
 171.42857142857142

In [None]:
# Sort the team_scores dictionary by values in descending order
sorted_team_scores = sort(collect(team_scores), by=x->x[2], rev=true)

9-element Vector{Pair{String, Float64}}:
  "Afghanistan" => 557.1428571428571
  "New Zealand" => 422.8571428571429
    "Australia" => 405.71428571428567
        "India" => 314.2857142857142
     "Pakistan" => 271.4285714285714
 "South Africa" => 239.99999999999997
   "Bangladesh" => 227.61904761904765
    "Sri Lanka" => 171.42857142857142
      "England" => 145.7142857142857

In [None]:
# Print the results
for (team, score) in sorted_team_scores
    println("$team : $score")
end

Afghanistan : 557.1428571428571
New Zealand : 422.8571428571429
Australia : 405.71428571428567
India : 314.2857142857142
Pakistan : 271.4285714285714
South Africa : 239.99999999999997
Bangladesh : 227.61904761904765
Sri Lanka : 171.42857142857142
England : 145.7142857142857


In [None]:
print(teams)

["Bangladesh", "South Africa", "England", "India", "Australia", "Netherlands", "Afghanistan", "New Zealand", "Pakistan", "Sri Lanka"]

In [None]:
# Define a vector to store the total scores
total_score = Float64[]

Float64[]

In [None]:
# Calculate the total scores and print them
for i in 1:length(teams)
    push!(total_score, batting_score[i] + bowling_score[i])
    println(teams[i], " : ", total_score[i])
end

Bangladesh : 388.03571428571433
South Africa : 779.5833333333334
England : 522.797619047619
India : 728.8690476190475
Australia : 732.797619047619
Netherlands : 206.25
Afghanistan : 767.5595238095237
New Zealand : 583.2738095238096
Pakistan : 602.6785714285713
Sri Lanka : 266.5674603174603


In [None]:
# Assign the total scores to 'full_score'
full_score = total_score

10-element Vector{Float64}:
 388.03571428571433
 779.5833333333334
 522.797619047619
 728.8690476190475
 732.797619047619
 206.25
 767.5595238095237
 583.2738095238096
 602.6785714285713
 266.5674603174603

In [None]:
total_score = copy(full_score)

10-element Vector{Float64}:
 388.03571428571433
 779.5833333333334
 522.797619047619
 728.8690476190475
 732.797619047619
 206.25
 767.5595238095237
 583.2738095238096
 602.6785714285713
 266.5674603174603

In [None]:
past_winners = Set(["India", "Pakistan", "Australia", "England", "Sri Lanka"])
new_countries = Set(["Afghanistan", "Netherlands"])

Set{String} with 2 elements:
  "Netherlands"
  "Afghanistan"

In [None]:
for (i, team) in enumerate(teams)
    if team in past_winners
        total_score[i] *= 1.2
    end

    if team in new_countries
        total_score[i] *= 0.8
    end
end

In [None]:
# Print the updated total_score
println(total_score)

[388.03571428571433, 779.5833333333334, 627.3571428571428, 874.642857142857, 879.3571428571428, 165.0, 614.047619047619, 583.2738095238096, 723.2142857142856, 319.88095238095235]


In [None]:
for i in 1:length(teams)
    println(teams[i], " ", total_score[i])
end

Bangladesh 388.03571428571433
South Africa 779.5833333333334
England 627.3571428571428
India 874.642857142857
Australia 879.3571428571428
Netherlands 165.0
Afghanistan 614.047619047619
New Zealand 583.2738095238096
Pakistan 723.2142857142856
Sri Lanka 319.88095238095235


In [None]:
team_scores = copy(total_score)

10-element Vector{Float64}:
 388.03571428571433
 779.5833333333334
 627.3571428571428
 874.642857142857
 879.3571428571428
 165.0
 614.047619047619
 583.2738095238096
 723.2142857142856
 319.88095238095235

In [None]:
points = Float64[]

Float64[]

In [None]:
for i in 1:length(teams)
    s = 0.0
    for j in 1:length(teams)
        if teams[i] != teams[j]
            s += 2 * total_score[i] / (total_score[i] + total_score[j])
        end
    end
    push!(points, s)
end

In [None]:
#Usecase-1 : Comparing winning probabilities for all the head to head matches in a round robin format
function matchups()
    for i in 1:length(teams)
        for j in i+1:length(teams)
            prob_factor = total_score[i] + total_score[j]
            println("Match up $(teams[i]) vs $(teams[j]) :")
            println("Chances to win for Team $(teams[i]): $(total_score[i] / prob_factor)")
            println("Chances to win for Team $(teams[j]): $(total_score[j] / prob_factor)")
        end
    end
end

m1 = matchups()


Match up Bangladesh vs South Africa :
Chances to win for Team Bangladesh: 0.33233075040783033
Chances to win for Team South Africa: 0.6676692495921697
Match up Bangladesh vs England :
Chances to win for Team Bangladesh: 0.3821532833878514
Chances to win for Team England: 0.6178467166121486
Match up Bangladesh vs India :
Chances to win for Team Bangladesh: 0.3073115542356103
Chances to win for Team India: 0.6926884457643897
Match up Bangladesh vs Australia :
Chances to win for Team Bangladesh: 0.3061684560543298
Chances to win for Team Australia: 0.6938315439456701
Match up Bangladesh vs Netherlands :
Chances to win for Team Bangladesh: 0.7016467549241201
Chances to win for Team Netherlands: 0.29835324507587985
Match up Bangladesh vs Afghanistan :
Chances to win for Team Bangladesh: 0.38722898722898724
Chances to win for Team Afghanistan: 0.6127710127710128
Match up Bangladesh vs New Zealand :
Chances to win for Team Bangladesh: 0.39949748743718594
Chances to win for Team New Zealand: 0

In [None]:
# Create a DataFrame
points_table = DataFrame(Team = teams, Probable_Points = points)

Row,Team,Probable_Points
Unnamed: 0_level_1,String,Float64
1,Bangladesh,7.42733
2,South Africa,10.695
3,England,9.67989
4,India,11.2215
5,Australia,11.2458
6,Netherlands,3.91267
7,Afghanistan,9.57879
8,New Zealand,9.33607
9,Pakistan,10.3468
10,Sri Lanka,6.5562


In [None]:
# Display the DataFrame
points_table

Row,Team,Probable_Points
Unnamed: 0_level_1,String,Float64
1,Bangladesh,7.42733
2,South Africa,10.695
3,England,9.67989
4,India,11.2215
5,Australia,11.2458
6,Netherlands,3.91267
7,Afghanistan,9.57879
8,New Zealand,9.33607
9,Pakistan,10.3468
10,Sri Lanka,6.5562


In [None]:
# Sort the DataFrame by 'Probable_Points' in descending order
#Use case-2: Predicting the final points table after each team plays 9matches(every other team in the competition)
sorted_result = sort(points_table, :Probable_Points, rev=true)
result=sorted_result

Row,Team,Probable_Points
Unnamed: 0_level_1,String,Float64
1,Australia,11.2458
2,India,11.2215
3,South Africa,10.695
4,Pakistan,10.3468
5,England,9.67989
6,Afghanistan,9.57879
7,New Zealand,9.33607
8,Bangladesh,7.42733
9,Sri Lanka,6.5562
10,Netherlands,3.91267


In [None]:
# Extract the top 4 teams and their points
top4_teams = sorted_result[1:4, :Team]
top4_points = sorted_result[1:4, :Probable_Points]

4-element Vector{Float64}:
 11.24580661634686
 11.221468483827978
 10.695014064861363
 10.346770529587456

In [None]:

top4_teams = collect(top4_teams)
top4_points = collect(top4_points)

4-element Vector{Float64}:
 11.24580661634686
 11.221468483827978
 10.695014064861363
 10.346770529587456

In [None]:
#Usecase-3: Top 4 teams/Semifinalists for the competition
top4_points = top4_points[1:4]
top4_teams = top4_teams[1:4]


4-element Vector{String}:
 "Australia"
 "India"
 "South Africa"
 "Pakistan"

In [None]:
#Usecase-4: Predicting the final winner of ICC Cricket World Cup 2023
for i in 1:length(top4_teams)
    chance = top4_points[i] / sum(top4_points)
    println("Chance for $(top4_teams[i]) to win World Cup 2023: $chance")
end

Chance for Australia to win World Cup 2023: 0.25847045868786
Chance for India to win World Cup 2023: 0.2579110778901663
Chance for South Africa to win World Cup 2023: 0.24581119748223215
Chance for Pakistan to win World Cup 2023: 0.2378072659397415


In [None]:
# Assuming top4_points is an array defined earlier
scores = []

push!(scores, (top4_points[1] / (top4_points[1] + top4_points[2])) + (top4_points[1] / (top4_points[1] + top4_points[2])))
push!(scores, (top4_points[2] / (top4_points[1] + top4_points[2])) + (top4_points[2] / (top4_points[2] + top4_points[3])))
push!(scores, top4_points[3] / (top4_points[2] + top4_points[3]))
push!(scores, top4_points[1] / (top4_points[1] + top4_points[2]))

# Print the calculated scores
println(scores)


Any[1.0010832703303076, 1.0114688327052503, 0.4879895321295958, 0.5005416351651538]


In [None]:
function win_pred()
    for i in 1:length(top4_teams)
        println("Chance for ", top4_teams[i], " to win World Cup 2023: ", scores[i] / sum(scores))
    end
end


win_pred (generic function with 1 method)

In [None]:
strong_bat = ["India", "Australia", "South Africa", "England"]
strong_bowl = ["India","Afghanistan","Australia","New Zealand"]

4-element Vector{String}:
 "India"
 "Afghanistan"
 "Australia"
 "New Zealand"

In [None]:
bat_name = df_filtered_bat.Player_Name
bat_nation = df_filtered_bat.Player_Nation
bat_rank = df_filtered_bat.PercentileRank

43-element Vector{Float64}:
  66.66666666666666
  29.166666666666668
  22.916666666666664
  91.66666666666666
  77.08333333333334
  56.25
  45.83333333333333
  31.25
  16.666666666666664
   8.333333333333332
  60.416666666666664
  37.5
  20.833333333333336
   ⋮
 100.0
  89.58333333333334
  79.16666666666666
  12.5
  95.83333333333334
  87.5
  85.41666666666666
  75.0
  70.83333333333334
  41.66666666666667
  50.0
  10.416666666666668

In [None]:
bowl_name = df_filtered_bowl.Player_Name
bowl_nation = df_filtered_bowl.Player_Nation
bowl_rank = df_filtered_bowl.Percentile_Rank

# Convert Julia arrays to lists
bowl_name = convert(Vector{String}, bowl_name)
bowl_nation = convert(Vector{String}, bowl_nation)
bowl_rank = convert(Vector{Float64}, bowl_rank)

25-element Vector{Float64}:
  94.28571428571428
  91.42857142857143
  74.28571428571429
 100.0
  82.85714285714286
  80.0
  28.57142857142857
  54.285714285714285
  40.0
  68.57142857142857
  34.285714285714285
   8.571428571428571
 100.0
  71.42857142857143
  28.57142857142857
  22.857142857142858
  88.57142857142857
  77.14285714285715
  85.71428571428571
  42.857142857142854
  62.857142857142854
  60.0
  45.714285714285715
  14.285714285714285
  57.14285714285714

In [None]:
rank_bat = String[]
rank_nation = String[]
rank_percentile = Float64[]

for i in 1:length(bat_name)
  if bat_nation[i] in strong_bat
      push!(rank_bat, bat_name[i])
      push!(rank_nation, bat_nation[i])
      push!(rank_percentile, bat_rank[i] * 1.1)
  else
      push!(rank_bat, bat_name[i])
      push!(rank_nation, bat_nation[i])
      push!(rank_percentile, bat_rank[i])
  end
end

In [None]:
function top_bat()
    println("Probable top batsmen of the world cup")

    # Create a vector of indices sorted by rank_percentile in descending order
    sorted_indices = sort(1:length(rank_percentile), rev=true, by=x -> rank_percentile[x])

    for i in sorted_indices
        println(rank_bat[i], " -------- ", rank_nation[i], " --- ", rank_percentile[i])
    end
end


top_bat()


Probable top batsmen of the world cup
Shubman Gill -------- India --- 107.70833333333333
Rassie van der Dussen -------- South Africa --- 105.41666666666669
David Warner -------- Australia --- 100.83333333333333
Babar Azam -------- Pakistan --- 100.0
Quinton de Kock -------- South Africa --- 96.25000000000001
Heinrich Klaasen -------- South Africa --- 93.95833333333333
Virat Kohli -------- India --- 91.66666666666669
Imam-ul-Haq -------- Pakistan --- 89.58333333333334
Rohit Sharma -------- India --- 89.37500000000001
Steve Smith -------- Australia --- 84.79166666666669
Temba Bavuma -------- South Africa --- 82.5
Dawid Malan -------- England --- 80.20833333333333
Fakhar Zaman -------- Pakistan --- 79.16666666666666
David Miller -------- South Africa --- 77.91666666666669
Jos Buttler -------- England --- 75.625
Jason Roy -------- England --- 71.04166666666669
Ibrahim Zadran -------- Afghanistan --- 66.66666666666666
Jonny Bairstow -------- England --- 64.16666666666667
Travis Head -------

In [None]:
# Create a DataFrame with the specified columns
bat_chart = DataFrame(
    Name = rank_bat,
    Bowler_Nation = rank_nation,
    Score = rank_percentile
)

Row,Name,Bowler_Nation,Score
Unnamed: 0_level_1,String,String,Float64
1,Ibrahim Zadran,Afghanistan,66.6667
2,Rahmanullah Gurbaz,Afghanistan,29.1667
3,Rahmat Shah,Afghanistan,22.9167
4,David Warner,Australia,100.833
5,Steve Smith,Australia,84.7917
6,Travis Head,Australia,61.875
7,Marnus Labuschagne,Australia,50.4167
8,Alex Carey,Australia,34.375
9,Glenn Maxwell,Australia,18.3333
10,Mitchell Marsh,Australia,9.16667


In [None]:
sort!(bat_chart, :Score, rev=true)

Row,Name,Bowler_Nation,Score
Unnamed: 0_level_1,String,String,Float64
1,Shubman Gill,India,107.708
2,Rassie van der Dussen,South Africa,105.417
3,David Warner,Australia,100.833
4,Babar Azam,Pakistan,100.0
5,Quinton de Kock,South Africa,96.25
6,Heinrich Klaasen,South Africa,93.9583
7,Virat Kohli,India,91.6667
8,Imam-ul-Haq,Pakistan,89.5833
9,Rohit Sharma,India,89.375
10,Steve Smith,Australia,84.7917


In [None]:
r1, r2, r3 = String[], String[], Float64[]
for i in 1:length(bowl_name)
  push!(r1, bowl_name[i])
  push!(r2, bowl_nation[i])

  # If the bowler's country is in the strong bowl list, multiply his rank by 1.1.
  if bowl_nation[i] in strong_bowl
      push!(r3, bowl_rank[i] * 1.1)
  else
      push!(r3, bowl_rank[i])
  end
end

In [None]:
# Create a DataFrame with the specified columns
bowl_chart = DataFrame(
    Name = r1,
    Bowler_Nation = r2,
    Score = r3
)

Row,Name,Bowler_Nation,Score
Unnamed: 0_level_1,String,String,Float64
1,Mujeeb Ur Rahman,Afghanistan,103.714
2,Rashid Khan,Afghanistan,100.571
3,Mohammad Nabi,Afghanistan,81.7143
4,Josh Hazlewood,Australia,110.0
5,Adam Zampa,Australia,91.1429
6,Mitchell Starc,Australia,88.0
7,Pat Cummins,Australia,31.4286
8,Shakib Al Hasan,Bangladesh,54.2857
9,Mustafizur Rahman,Bangladesh,40.0
10,Chris Woakes,England,68.5714


In [None]:
function top_bowlers()
    println("Probable top bowlers of the World Cup")

    for i in 1:size(bowl_chart, 1)
        println(bowl_chart[i, "Name"], " -------- ", bowl_chart[i, "Bowler_Nation"], " --- ", bowl_chart[i, "Score"])
    end
end


top_bowlers (generic function with 1 method)

In [None]:
bowl_chart = sort(bowl_chart, :Score, rev=true)

Row,Name,Bowler_Nation,Score
Unnamed: 0_level_1,String,String,Float64
1,Josh Hazlewood,Australia,110.0
2,Mohammed Siraj,India,110.0
3,Mujeeb Ur Rahman,Afghanistan,103.714
4,Rashid Khan,Afghanistan,100.571
5,Trent Boult,New Zealand,97.4286
6,Adam Zampa,Australia,91.1429
7,Mitchell Starc,Australia,88.0
8,Shaheen Afridi,Pakistan,85.7143
9,Matt Henry,New Zealand,84.8571
10,Mohammad Nabi,Afghanistan,81.7143


In [None]:
top_bowl = bowl_chart[!, "Name"][1:15]

15-element Vector{String}:
 "Josh Hazlewood"
 "Mohammed Siraj"
 "Mujeeb Ur Rahman"
 "Rashid Khan"
 "Trent Boult"
 "Adam Zampa"
 "Mitchell Starc"
 "Shaheen Afridi"
 "Matt Henry"
 "Mohammad Nabi"
 "Kuldeep Yadav"
 "Chris Woakes"
 "Keshav Maharaj"
 "Kagiso Rabada"
 "Maheesh Theekshana"

In [None]:
using Readline_jll

println("Welcome to ICC World Cup Predictor")
println("MENU")
println("1. Head to head predictions")
println("2. Points table")
println("3. Top 4 Teams")
println("4. Winner Probability Prediction")
println("5. Best Batsmen")
println("6. Best Bowlers")


while(true)
    # Get user input
    print("Enter your choice: ")
    a_str = readline()
    a = parse(Int, a_str)

    option = string(a)
    if option == "1"
        println(matchups())
        continue
    elseif option == "2"
        println(result)
        continue
    elseif option == "3"
        println(top4_teams)
        continue
    elseif option == "4"
        println(win_pred())
        continue
    elseif option == "5"
        println(top_bat())
        continue
    elseif option == "6"
        println(top_bowlers())
        continue
    else
        println("Invalid Option")
        break
    end
end

Welcome to ICC World Cup Predictor
MENU
1. Head to head predictions
2. Points table
3. Top 4 Teams
4. Winner Probability Prediction
5. Best Batsmen
6. Best Bowlers
Enter your choice: 

stdin>  1


Match up Bangladesh vs South Africa :
Chances to win for Team Bangladesh: 0.33233075040783033
Chances to win for Team South Africa: 0.6676692495921697
Match up Bangladesh vs England :
Chances to win for Team Bangladesh: 0.3821532833878514
Chances to win for Team England: 0.6178467166121486
Match up Bangladesh vs India :
Chances to win for Team Bangladesh: 0.3073115542356103
Chances to win for Team India: 0.6926884457643897
Match up Bangladesh vs Australia :
Chances to win for Team Bangladesh: 0.3061684560543298
Chances to win for Team Australia: 0.6938315439456701
Match up Bangladesh vs Netherlands :
Chances to win for Team Bangladesh: 0.7016467549241201
Chances to win for Team Netherlands: 0.29835324507587985
Match up Bangladesh vs Afghanistan :
Chances to win for Team Bangladesh: 0.38722898722898724
Chances to win for Team Afghanistan: 0.6127710127710128
Match up Bangladesh vs New Zealand :
Chances to win for Team Bangladesh: 0.39949748743718594
Chances to win for Team New Zealand: 0

stdin>  2


[1m10×2 DataFrame[0m
[1m Row [0m│[1m Team         [0m[1m Probable_Points [0m
     │[90m String       [0m[90m Float64         [0m
─────┼───────────────────────────────
   1 │ Australia            11.2458
   2 │ India                11.2215
   3 │ South Africa         10.695
   4 │ Pakistan             10.3468
   5 │ England               9.67989
   6 │ Afghanistan           9.57879
   7 │ New Zealand           9.33607
   8 │ Bangladesh            7.42733
   9 │ Sri Lanka             6.5562
  10 │ Netherlands           3.91267
Enter your choice: 

stdin>  3


["Australia", "India", "South Africa", "Pakistan"]
Enter your choice: 

stdin>  4


Chance for Australia to win World Cup 2023: 0.3335739731807327
Chance for India to win World Cup 2023: 0.33703457771564105
Chance for South Africa to win World Cup 2023: 0.16260446251325983
Chance for Pakistan to win World Cup 2023: 0.16678698659036634
nothing
Enter your choice: 

stdin>  5


Probable top batsmen of the world cup
Shubman Gill -------- India --- 107.70833333333333
Rassie van der Dussen -------- South Africa --- 105.41666666666669
David Warner -------- Australia --- 100.83333333333333
Babar Azam -------- Pakistan --- 100.0
Quinton de Kock -------- South Africa --- 96.25000000000001
Heinrich Klaasen -------- South Africa --- 93.95833333333333
Virat Kohli -------- India --- 91.66666666666669
Imam-ul-Haq -------- Pakistan --- 89.58333333333334
Rohit Sharma -------- India --- 89.37500000000001
Steve Smith -------- Australia --- 84.79166666666669
Temba Bavuma -------- South Africa --- 82.5
Dawid Malan -------- England --- 80.20833333333333
Fakhar Zaman -------- Pakistan --- 79.16666666666666
David Miller -------- South Africa --- 77.91666666666669
Jos Buttler -------- England --- 75.625
Jason Roy -------- England --- 71.04166666666669
Ibrahim Zadran -------- Afghanistan --- 66.66666666666666
Jonny Bairstow -------- England --- 64.16666666666667
Travis Head -------

stdin>  6


Probable top bowlers of the World Cup
Josh Hazlewood -------- Australia --- 110.00000000000001
Mohammed Siraj -------- India --- 110.00000000000001
Mujeeb Ur Rahman -------- Afghanistan --- 103.71428571428571
Rashid Khan -------- Afghanistan --- 100.57142857142858
Trent Boult -------- New Zealand --- 97.42857142857143
Adam Zampa -------- Australia --- 91.14285714285715
Mitchell Starc -------- Australia --- 88.0
Shaheen Afridi -------- Pakistan --- 85.71428571428571
Matt Henry -------- New Zealand --- 84.85714285714288
Mohammad Nabi -------- Afghanistan --- 81.71428571428572
Kuldeep Yadav -------- India --- 78.57142857142858
Chris Woakes -------- England --- 68.57142857142857
Keshav Maharaj -------- South Africa --- 62.857142857142854
Kagiso Rabada -------- South Africa --- 60.0
Maheesh Theekshana -------- Sri Lanka --- 57.14285714285714
Shakib Al Hasan -------- Bangladesh --- 54.285714285714285
Lungi Ngidi -------- South Africa --- 45.714285714285715
Haris Rauf -------- Pakistan --- 42

stdin>  7


Invalid Option
