In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
df = pd.read_csv('/kaggle/input/epl-results-19932018/EPL_Set.csv')
df.head()

### MOST OF OUR ANALYSIS WILL BE ON THE BASIS OF HALF-TIME SCORES, SO WE'LL REMOVE THE 1993-94 AND 1994-95 SEASONS, AND WE'LL ALSO DROP THE 'DIV' COLUMN, AS THERE'S NO USE OF IT.

In [None]:
df.dropna(inplace=True)
df.drop('Div',axis=1, inplace=True)
df.reset_index(inplace=True, drop=True)
df.head()

## We'll make Date column as datetime, and HTHG and HTAG as integers

In [None]:
df.HTAG = df.HTAG.apply(int)
df.HTHG = df.HTHG.apply(int)
df.Date = pd.to_datetime(df.Date, dayfirst=True)

## We'll add a 'dayofweek' column, which we'll use to check performaces of teams on each day of the week. 0 is Monday, 1 is Tuesday ... 6 is Sunday.

In [None]:
df['dayofweek'] = df.Date.apply(lambda x: x.dayofweek)
df.head()

In [None]:
df.dayofweek.value_counts() # Most of the matches have been played on a Saturday

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
plt.figure(figsize=(12,6))
sns.countplot('dayofweek',data=df,hue='FTR')

# For some reason, teams tend to love the prospects of playing at Home on a Saturday
# Maybe because they're relatively fresh, and so are the majority Home supporters.

## Now comes the Half-Time analysis part! Get Set GOOO!

### When home teams are leading my 3 or more goals at HT, it wouldn't be wise to bet on the away team to win

In [None]:
x = df[(df.HTHG) - (df.HTAG) >= 3].FTR.value_counts()
print(x)
print(x / x.sum())

### When away teams are leading my 3 or more goals at HT, it wouldn't be wise either to bet on the home team to stage a comeback and win.

In [None]:
x = df[(df.HTAG) - (df.HTHG) >= 3].FTR.value_counts()
print(x)
print(x / x.sum())

### When home teams are leading by a 2 goal margin at HT, there's a mammoth 93% chance of them winning!

In [None]:
x = df[(df.HTHG) - (df.HTAG) == 2].FTR.value_counts()
print(x)
print(x / x.sum())

### When home teams are losing by a 2 goal margin at HT, there's only a 2% chance of them  completing a comeback

In [None]:
x = df[(df.HTAG) - (df.HTHG) == 2].FTR.value_counts()
print(x)
print(x / x.sum())

### When home teams are leading by a 1 goal margin at HT, there's a 75% chance of them winning

In [None]:
x = df[(df.HTHG) - (df.HTAG) == 1].FTR.value_counts()
print(x)
print(x / x.sum())

### When home teams are losing by a 1 goal margin at HT, there's a decent (38%) chance of the them fighting back and AT LEAST salvaging a draw.

In [None]:
x = df[(df.HTAG) - (df.HTHG) == 1].FTR.value_counts()
print(x)
print(x / x.sum())

### When it's a DRAW at HT, stay away from placing the bet! The match could go either way!

In [None]:
x = df[(df.HTHG) - (df.HTAG) == 0].FTR.value_counts()
print(x)
print(x / x.sum())

## Now comes the part where we discuss individual teams' track record over the seven days of the week (I'd be doing Arsenal, Liverpool and Man Utd here)


### Your team should avoid Arsenal at their home on a Saturday at any cost!
### While your team can face the Gunners on a Sunday at your own home.

In [None]:
ars_home = df[(df.HomeTeam == 'Arsenal')]
ars_away = df[(df.AwayTeam == 'Arsenal')]
plt.figure(figsize=(12,6))
plt.subplot(211)
sns.countplot('dayofweek',data=ars_home,hue='FTR', hue_order=['H','D','A'])
plt.title('Arsenal Home Results')
plt.figure(figsize=(12,6))
plt.subplot(212)
sns.countplot('dayofweek',data=ars_away,hue='FTR', hue_order=['H','D','A'])
plt.title('Arsenal Away Results')
plt.show()

### Liverpool next up -- I'll leave the conclusions to you!

In [None]:
liv_home = df[(df.HomeTeam == 'Liverpool')]
liv_away = df[(df.AwayTeam == 'Liverpool')]

plt.figure(figsize=(12,6))
plt.subplot(2,1,1)
sns.countplot('dayofweek',data=liv_home,hue='FTR', hue_order=['H','D','A'])
plt.title('Liverpool Home Results')
plt.show()
plt.figure(figsize=(12,6))
plt.subplot(2,1,2)
sns.countplot('dayofweek',data=liv_away,hue='FTR', hue_order=['H','D','A'])
plt.title('Liverpool Away Results')
plt.show()

### Finally, Man Utd -- Dare not bet against them winning at Old Trafford on a Saturday!

In [None]:
mutd_home = df[(df.HomeTeam == 'Man United')]
mutd_away = df[(df.AwayTeam == 'Man United')]

plt.figure(figsize=(12,6))
plt.subplot(2,1,1)
sns.countplot('dayofweek',data=mutd_home,hue='FTR', hue_order=['H','D','A'])
plt.title('Man United Home Results')
plt.show()
plt.figure(figsize=(12,6))
plt.subplot(2,1,2)
sns.countplot('dayofweek',data=mutd_away,hue='FTR', hue_order=['H','D','A'])
plt.title('Man United Away Results')
plt.show()

## NOW TIME FOR THE MOST INTERESTING PART OF THE ANALYSIS:

# We'll see the Full Time results of Arsenal, Liverpool & Man Utd based on Half Time scores

### 1. HOME TEAM LEADING AT HALF TIME:

In [None]:
#ARSENAL: There's nearly a 90% chance of Arsenal winning a game in which they've led at HT.

x = ars_home[(ars_home.HTR == 'H')].FTR.value_counts()
print(x)
print(x * 100 / x.sum())

In [None]:
#LIVERPOOL: There's a tiny 0.5% (zero-point-five!) chance of Liverpool losing a game in which they've led at HT! WOW!

x = liv_home[(liv_home.HTR == 'H')].FTR.value_counts()
print(x)
print(x * 100/ x.sum())

In [None]:
#MAN UNITED: Thev've NEVER lost a game in which they've led at HT! DAMNNNNN!

x = mutd_home[(mutd_home.HTR == 'H')].FTR.value_counts()
print(x)
print(x * 100 / x.sum())

### 2. HOME TEAM LOSING AT HALF TIME:

In [None]:
# ARSENAL: All results are possible when Arsenal are losing at home at HT

x = ars_home[(ars_home.HTR == 'A')].FTR.value_counts()
print(x)
print(x * 100 / x.sum())

In [None]:
# LIVERPOOL: There's more than 50% of a chance of the away team winning, when they've led at HT at Anfield

x = liv_home[(liv_home.HTR == 'A')].FTR.value_counts()
print(x)
print(x * 100 / x.sum())

In [None]:
# MAN UTD: Surprisingly, United's record at Old Trafford ain't as good when they've been losing at HT

x = mutd_home[(mutd_home.HTR == 'A')].FTR.value_counts()
print(x)
print(x * 100 / x.sum())

### 3. AWAY TEAM LOSING AT HALF TIME:

In [None]:
#ARSENAL: When Arsenal are losing at HT away from home, there's only a 12% chance they'll win the game.

x = ars_away[(ars_away.HTR == 'H')].FTR.value_counts()
print(x)
print(x * 100 / x.sum())

In [None]:
#LIVERPOOL : When Liverpool are losing at HT away from home, there's only a 8% chance they'll win the game

x = liv_away[(liv_away.HTR == 'H')].FTR.value_counts()
print(x)
print(x * 100/ x.sum())

In [None]:
# MAN UTD: When Man Utd are losing at HT away from home, there's a 20% chance they'll come back and the game! WHOAAA!
x = mutd_away[(mutd_away.HTR == 'H')].FTR.value_counts()
print(x)
print(x * 100/ x.sum())

### 4. AWAY TEAM WINNING AT HALF TIME:

In [None]:
#ARSENAL: Whenever Arsenal are leading at HT away from home, there's an 80% chance they'll eventually win the game

x = ars_away[(ars_away.HTR == 'A')].FTR.value_counts()
print(x)
print(x *100/ x.sum())

In [None]:
#LIVERPOOL: Whenever Liverpool are leading away from home at HT, there's a 77% chance they'll end up winning

x = liv_away[(liv_away.HTR == 'A')].FTR.value_counts()
print(x)
print(x *100/ x.sum())

In [None]:
#MAN UTD: Whenever the Red Devils are leading at HT away from home, there's only a 2.5% chance the opponent will stage a coma=eback and win it!

x = mutd_away[(mutd_away.HTR == 'A')].FTR.value_counts()
print(x)
print(x *100/ x.sum())

# A VERY BIG THANK YOU FOR STAYING UNTIL THE END (OR IF YOU'VE DIRECTLY HIT 'END' ON YOUR KEYBOARDS TO SKIP TO THE BOTTOM, I'D ADVISE YOU TO GIVE IT A READ!) Aufwiedersehen! :)