# NRL match analysis

Going to determine which stats have the biggest impact on the outcome of NRL matches. We will see if we can use this information to make predictions about future matches.

In [2]:
#Import Pandas
import pandas as pd

In [3]:
#Connect to database
import mysql.connector as sql

mydb = sql.connect(
  host="localhost",
  user="root",
  passwd="NYg1@nts",
  database="NRL_data"
)


In [4]:
#Function to easily create dataframes from SQL query results
def create_df(query):
    return pd.read_sql_query(query, con=mydb)

In [31]:
#Only want to analyze matches from the 2018 season
matches2018 = "SELECT * FROM Matches WHERE year(date) = '2018';"
matches2018 = create_df(matches2018)
matches2018.head()

Unnamed: 0,id,date,round,home_team_id,home_score,away_team_id,away_score,winner,is_draw,stadium_id,weather,url
0,1,2018-03-08,1,14,34,1,12,14,0,1,,http://www.nrl.com/draw/nrl-premiership/2018/r...
1,2,2018-03-09,1,8,19,6,18,8,0,2,,http://www.nrl.com/draw/nrl-premiership/2018/r...
2,3,2018-03-09,1,10,20,4,14,10,0,3,,http://www.nrl.com/draw/nrl-premiership/2018/r...
3,4,2018-03-10,1,16,10,15,8,16,0,4,,http://www.nrl.com/draw/nrl-premiership/2018/r...
4,5,2018-03-10,1,13,20,9,32,9,0,5,,http://www.nrl.com/draw/nrl-premiership/2018/r...


In [80]:
#Better way to view the data in a table...easier to work with team names than ids
matches2018 = '''SELECT m.id, m.round, m.date,
CASE
	WHEN m.home_team_id = m.winner THEN home.nickname
    ELSE away.nickname
END winner,
CASE
	WHEN m.home_team_id = m.winner THEN m.home_score
    ELSE m.away_score
END winning_score,
CASE
	WHEN m.home_team_id = m.winner THEN away.nickname
    ELSE home.nickname
END loser,
CASE
	WHEN m.home_team_id = m.winner THEN m.away_score
    ELSE m.home_score
END losing_score, home.nickname home, away.nickname away
FROM Matches m
JOIN Teams home
ON m.home_team_id = home.id
JOIN Teams away
ON m.away_team_id = away.id
WHERE year(m.date) = 2018
ORDER BY date;'''
matches2018 = create_df(matches2018)
matches2018.head()

Unnamed: 0,id,round,date,winner,winning_score,loser,losing_score,home,away
0,1,1,2018-03-08,Dragons,34,Broncos,12,Dragons,Broncos
1,2,1,2018-03-09,Knights,19,Sea Eagles,18,Knights,Sea Eagles
2,3,1,2018-03-09,Cowboys,20,Sharks,14,Cowboys,Sharks
3,6,1,2018-03-10,Storm,36,Bulldogs,18,Bulldogs,Storm
4,5,1,2018-03-10,Warriors,32,Rabbitohs,20,Rabbitohs,Warriors


In [82]:
winners = matches2018.groupby('winner')
w_scored = winners['winning_score'].sum()
w_allowed = winners['losing_score'].sum()

losers = matches2018.groupby('loser')
l_scored = losers['losing_score'].sum()
l_allowed = losers['winning_score'].sum()

total_scores = pd.concat([w_scored, l_scored, w_allowed, l_allowed], axis=1, join='inner')
total_scores.columns = ['scored_in_win', 'scored_in_loss', 'allowed_in_win', 'allowed_in_loss']
total_scores

Unnamed: 0,scored_in_win,scored_in_loss,allowed_in_win,allowed_in_loss
Broncos,402,154,225,275
Bulldogs,220,208,108,366
Cowboys,226,223,140,381
Dragons,415,104,196,276
Eels,170,204,70,480
Knights,202,212,144,463
Panthers,383,134,209,252
Rabbitohs,466,116,245,192
Raiders,271,292,134,406
Roosters,456,86,198,163


In [35]:
#Make final standings
standings2018 = '''SELECT t.nickname team,
    count(m.winner) as wins,
    24 - count(m.winner) as loses
FROM Matches m
JOIN Teams t
ON m.winner = t.id
WHERE year(m.date) = 2018
GROUP BY m.winner
ORDER BY wins DESC;'''
standings2018 = create_df(standings2018)
print(standings2018)

          team  wins  loses
0     Roosters    16      8
1    Rabbitohs    16      8
2        Storm    16      8
3       Sharks    16      8
4      Dragons    15      9
5      Broncos    15      9
6     Warriors    15      9
7     Panthers    15      9
8       Tigers    12     12
9      Raiders    10     14
10     Knights     9     15
11      Titans     8     16
12     Cowboys     8     16
13    Bulldogs     8     16
14  Sea Eagles     7     17
15        Eels     6     18


In [None]:
#PlayerMatchStats
stats2018 = '''SELECT t.nickname team, 
	count(m.winner) * 2 as points,
    count(m.winner) as wins
FROM PlayerMatchStats m
JOIN Teams t
ON m.winner = t.id
WHERE year(m.date) = 2018
GROUP BY m.winner
ORDER BY points DESC;'''

In [117]:
query = '''SELECT CONCAT(p.first_name, ' ' ,p.last_name) name, t.nickname team, p_stats.*
        FROM PlayerMatchStats p_stats
        JOIN Players p
        ON p_stats.player_id = p.id
        JOIN Teams t
        ON p_stats.team_id = t.id;'''

player_df = create_df(query)

In [123]:
print(player_df.groupby(['match_id', 'team_id']))

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x11af15940>
