In [69]:
library(gtools)
library(dplyr)
library(ggplot2)

# load data
inTeams<-read.csv("https://projects.fivethirtyeight.com/soccer-api/club/spi_global_rankings.csv")

In [86]:
# Prize dataframe
ranks <- 1:20
prizes <- matrix(ranks, nrow=20, ncol=1, byrow=FALSE)
prizes <- as.data.frame(prizes)
colnames(prizes) <- c('team_rank')
prizes$prize <- c(225,210,200,190,160,150,140,125,123,120,118,115,112,110,108,105,100,52,50,49)

# subset to premier league
premLeague<-as.data.frame(subset(inTeams,league=="Barclays Premier League"))
premTeams<-premLeague$name
rownames(premLeague)<-premTeams
df.premorig<-premLeague[,c("off","def")]
df.premorig$team_names <- row.names(df.premorig)
df.premorig

# All possible matches in a season
allMatches<-permutations(20, 2, v=rownames(df.premorig),repeats.allowed=FALSE)
colnames(allMatches)<-c("home","away")

teams <- list(df.premorig$team_names)

# Adjust offense parameters for each team
df.premorig$off <- as.numeric(df.premorig$off)
df.premorig$def <- as.numeric(df.premorig$def)
df.prem <- as.data.frame(df.premorig)

# build draw score function
draw.score<-function(team1,team2){
    c(
        rpois(1,exp(alphaList[team1]-deltaList[team2])),
  rpois(1,exp(alphaList[team2]-deltaList[team1]))
    )
}

# season simulation function
SeasonSim <- function(allMatches){
     ScoresMatrix <- matrix(nrow=nrow(allMatches),  ncol=4)
        for (ii in 1:nrow(allMatches)  ) {
         ScoresMatrix[ii,1:2]=allMatches[ii,]
         ScoresMatrix[ii,3:4]= as.numeric(draw.score(allMatches[ii,"home"],allMatches[ii,"away"] ))
        }
        colnames(ScoresMatrix)<-c("home.team","away.team","home.score","away.score")
        ScoresMatrix <- as.data.frame(ScoresMatrix)
        ScoresMatrix$home.score <- as.numeric(ScoresMatrix$home.score)
        ScoresMatrix$away.score <- as.numeric(ScoresMatrix$away.score)  
        
        # PEPARING AGGREGATE STATS DFs
        df.points <- as.data.frame(ScoresMatrix)
        df.points_goalsfor <- as.data.frame(ScoresMatrix)
        df.points_goalsagainst <- as.data.frame(ScoresMatrix)
        name <- rownames(df.prem)
        for (i in 1:20){
            df.points[, name[i]] <- 0
        }
        df.points

        # ASSIGNING POINTS PER MATCH
        for (i in 1:380){ 
            for (j in 1:20){
                if (df.points[i,1] == name[j]){
                    if (df.points[i,3] > df.points[i,4]){
                        df.points[i,j+4] = 3
                    } else if (df.points[i,3] == df.points[i,4]){
                        df.points[i,j+4] = 1
                    } else if (df.points[i,3] < df.points[i,4]){
                        df.points[i,j+4] = 0
                    } 
                } 
                if (df.points[i,2] == name[j]){
                    if (df.points[i,3] > df.points[i,4]){
                        df.points[i,j+4] = 0
                    } else if (df.points[i,3] == df.points[i,4]){
                        df.points[i,j+4] = 1
                    } else if (df.points[i,3] < df.points[i,4]){
                        df.points[i,j+4] = 3
                    }      
                }
            }   
        } 
    
        # PREPARING DFs
        for (i in 1:20){
            df.points_goalsfor[, name[i]] <- 0
        }
        for (i in 1:20){
            df.points_goalsagainst[, name[i]] <- 0
        }
    
        # GOALS FOR
        for (i in 1:380){
            for (j in 1:20){
                if (df.points_goalsfor[i,1] == name[j]){ # looking at home team
                    df.points_goalsfor[i,j+4] = df.points_goalsfor[i,3]  # home points go to goals for
                }
                if (df.points_goalsfor[i,2] == name[j]){ # looking at away team
                    df.points_goalsfor[i,j+4] = df.points_goalsfor[i,4] # away points go to goals for
                }
            }
        }
    
        # GOALS AGAINST
        for (i in 1:380){
            for (j in 1:20){
                if (df.points_goalsagainst[i,1] == name[j]){ # looking at home team
                    df.points_goalsagainst[i,j+4] = df.points_goalsagainst[i,4]  # home points go to goals for
                }
                if (df.points_goalsagainst[i,2] == name[j]){ # looking at away team
                    df.points_goalsagainst[i,j+4] = df.points_goalsagainst[i,3] # away points go to goals for
                }
            }
        } 
        
        # AGGREGATING POINTS and GOALS
        points <- df.points[,c(-1,-2,-3,-4)]
        points <- sapply(points, as.numeric)
        points_sum <- as.data.frame(colSums(points))
        
        goalsfor <- df.points_goalsfor[,c(-1,-2,-3,-4)]
        goalsfor <- sapply(goalsfor, as.numeric)
        goalsfor_sum <- as.data.frame(colSums(goalsfor))
    
        goalsagainst <- df.points_goalsagainst[,c(-1,-2,-3,-4)]
        goalsagainst <- sapply(goalsagainst, as.numeric)
        goalsagainst_sum <- as.data.frame(colSums(goalsagainst))

        # MERGE TO ONE DF
        df.teams <- cbind(points_sum, goalsfor_sum, goalsagainst_sum)
        colnames(df.teams)[1] <- 'points'
        colnames(df.teams)[2] <- 'goals_for'
        colnames(df.teams)[3] <- 'goals_against'
        df.teams$team_names <- row.names(df.teams)
    
        # ADD TIE BREAKER VARIABLES
        df.teams <- df.teams %>%
            mutate(net_goals = goals_for - goals_against) %>%
            mutate(tie_breaker = runif(20, 0, 1))
        df.teams1 <- df.teams
        df.teams1$temp_team_rank <- rank(desc(df.teams1$points))
        df.teams1$temp_team_rank2 <- rank(desc(df.teams1$points))
        df.teams1$goals_rank <- rank(desc(df.teams1$net_goals))
        df.teams1$goalsfor_rank <- rank(desc(df.teams1$goals_for))
        df.teams1
    
        # RANK 
        for (ii in 1:nrow(df.teams1)){
            for (jj in 1:nrow(df.teams1)){
                if (ii != jj & df.teams1[ii,7] == df.teams1[jj,7]){
                    if (df.teams1[ii,9] > df.teams1[jj,9]){ #goals rank
                        df.teams1[ii,8] = df.teams1[ii,8]+0.5
                        df.teams1[jj,8] = df.teams1[jj,8]-0.5
                    } else if (df.teams1[ii,9] == df.teams1[jj,9]){ 
                        if (df.teams1[ii,10] > df.teams1[jj,10]){ #goals_for rank
                            df.teams1[ii,8] = df.teams1[ii,8]+0.5
                            df.teams1[jj,8] = df.teams1[jj,8]-0.5
                        } else if (df.teams1[ii,10] == df.teams1[jj,10]){                   
                            if (df.teams1[ii,6] > df.teams1[jj,6]){
                            df.teams1[ii,8] = df.teams1[ii,8]+0.5
                            df.teams1[jj,8] = df.teams1[jj,8]-0.5
                            }   
                        }
                    }
                }
            }
        }
        df.teams1$team_rank <- rank(df.teams1$temp_team_rank2)
        
         # ASSIGN PRIZE
        df.earnings <- left_join(df.teams1, prizes, by=c("team_rank"))
        df.earnings
    }


# simulations
    nsims <- 1

Unnamed: 0,off,def,team_names
Manchester City,2.9,0.2,Manchester City
Liverpool,2.92,0.25,Liverpool
Chelsea,2.39,0.29,Chelsea
Arsenal,2.16,0.46,Arsenal
Tottenham Hotspur,2.36,0.65,Tottenham Hotspur
Manchester United,2.19,0.67,Manchester United
Aston Villa,1.99,0.63,Aston Villa
West Ham United,2.02,0.73,West Ham United
Brighton and Hove Albion,1.85,0.62,Brighton and Hove Albion
Crystal Palace,1.89,0.69,Crystal Palace


In [84]:
 # results df (offense)
    finalcol <- c("points", "goals_for", "goals_against", "team_names", "net_goals", "tie_breaker", "team_rank", "goals_rank", "goalsfor_rank", "prize", "sim")
    finalcol2 <- c("points", "goals_for", "goals_against", "team_names", "net_goals", "tie_breaker", "team_rank", "goals_rank", "goalsfor_rank", "prize", "sim", "team_change")

    df.final.output.offense <- as.data.frame(matrix(ncol = length(finalcol), nrow=0, dimnames = list(NULL,finalcol)))
    df.offense <- as.data.frame(matrix(ncol = length(finalcol), nrow=0, dimnames = list(NULL,finalcol)))

    df.final.output.defense <- as.data.frame(matrix(ncol = length(finalcol), nrow=0, dimnames = list(NULL,finalcol)))
    df.defense <- as.data.frame(matrix(ncol = length(finalcol), nrow=0, dimnames = list(NULL,finalcol)))

In [83]:
for (i in 1:20) {
    # increasing goals expected by 10%
    df.prem[i,1] = df.prem[i,1]*1.10

    # build parameters
    lmean.def<- log(mean(df.prem$def))
    lmean.off<- log(mean(df.prem$off))               
    df.prem["alpha"]<-log(df.prem["off"])-lmean.def
    df.prem["delta"]<-lmean.off-log(df.prem["def"])
    alphaList<-df.prem$alpha
    deltaList<-df.prem$delta
    names(alphaList)<-rownames(df.prem)
    names(deltaList)<-rownames(df.prem)

    # simulation (with one team changed)
    #for (rep in 1:nsims) 
        df.sim.earnings <- SeasonSim(allMatches)
        df.sim.earnings$sim <- i
        #df.final.output.offense <- rbind(df.final.output.offense, df.sim.earnings)
    #
    #df.final.output.offense$team_change <- i
    df.offense <- rbind(df.offense, df.sim.earnings)
}

df.offense



points,goals_for,goals_against,team_names,net_goals,tie_breaker,temp_team_rank,temp_team_rank2,goals_rank,goalsfor_rank,team_rank,prize,sim
92,98,44,Manchester City,54,0.20745713,1.0,1,1.0,1.0,1,225,1
80,93,53,Liverpool,40,0.49665003,2.0,2,2.0,2.0,2,210,1
58,83,56,Chelsea,27,0.87796945,9.0,9,3.0,4.5,9,123,1
64,76,63,Arsenal,13,0.79944684,4.0,4,6.5,9.0,4,190,1
67,84,69,Tottenham Hotspur,15,0.16644487,3.0,3,5.0,3.0,3,200,1
63,83,65,Manchester United,18,0.04749161,5.5,5,4.0,4.5,5,160,1
62,82,71,Aston Villa,11,0.33436681,7.0,7,8.0,6.0,7,140,1
50,74,79,West Ham United,-5,0.77651478,11.0,11,11.5,10.0,11,118,1
63,80,71,Brighton and Hove Albion,9,0.14011304,5.5,6,9.0,7.0,6,150,1
41,64,73,Crystal Palace,-9,0.95618838,13.0,13,13.0,18.5,13,112,1


In [None]:

# taking averages
mean.off<-mean(df.prem$off)
mean.def<-mean(df.prem$def)
lmean.off<-mean(log(df.prem$off))
lmean.def<-mean(log(df.prem$def)) 

# build parameters
lmean.def<- log(mean(df.prem$def))
lmean.off<- log(mean(df.prem$off))               
df.prem["alpha"]<-log(df.prem["off"])-lmean.def
df.prem["delta"]<-lmean.off-log(df.prem["def"])
alphaList<-df.prem$alpha
deltaList<-df.prem$delta
names(alphaList)<-rownames(df.prem)
names(deltaList)<-rownames(df.prem)


In [85]:
for (i in 1:20) {
    # decreasing goals conceded by 10%
    df.prem[i,2] = df.prem[i,2]*.90

    # build parameters
    lmean.def<- log(mean(df.prem$def))
    lmean.off<- log(mean(df.prem$off))               
    df.prem["alpha"]<-log(df.prem["off"])-lmean.def
    df.prem["delta"]<-lmean.off-log(df.prem["def"])
    alphaList<-df.prem$alpha
    deltaList<-df.prem$delta
    names(alphaList)<-rownames(df.prem)
    names(deltaList)<-rownames(df.prem)

    # simulation (with one team changed)
    #for (rep in 1:nsims) 
        df.sim.earnings <- SeasonSim(allMatches)
        df.sim.earnings$sim <- i
        #df.final.output.offense <- rbind(df.final.output.offense, df.sim.earnings)
    #
    #df.final.output.offense$team_change <- i
    df.defense <- rbind(df.defense, df.sim.earnings)
}

df.defense

points,goals_for,goals_against,team_names,net_goals,tie_breaker,temp_team_rank,temp_team_rank2,goals_rank,goalsfor_rank,team_rank,prize,sim
87,107,51,Manchester City,56,0.9932754,2.0,2,1.0,1.0,2,210,1
91,93,47,Liverpool,46,0.3566526,1.0,1,2.0,3.0,1,225,1
75,82,56,Chelsea,26,0.5905858,3.0,3,4.0,5.0,3,200,1
68,82,55,Arsenal,27,0.1954725,4.0,4,3.0,5.0,4,190,1
67,95,75,Tottenham Hotspur,20,0.7864311,5.0,5,5.0,2.0,5,160,1
57,77,64,Manchester United,13,0.5790034,7.0,7,6.0,10.0,7,140,1
56,80,73,Aston Villa,7,0.7601488,8.0,8,8.0,7.0,8,125,1
37,66,84,West Ham United,-18,0.2757731,17.0,17,16.0,16.5,17,100,1
60,82,72,Brighton and Hove Albion,10,0.6171151,6.0,6,7.0,5.0,6,150,1
42,76,83,Crystal Palace,-7,0.5354543,15.0,15,12.5,11.0,15,108,1
