In [2]:
import sqlite3
import pandas as pd
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
import os

import plotly.graph_objs as go
import sys
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [3]:
init_notebook_mode(connected=True)

In [4]:
# build connection for data from sqllite db
path = "."
database = os.path.join(path, 'database.sqlite')
conn = sqlite3.connect(database)
pd.set_option("display.max_columns",100)

The general information for this database.  
## Understand the tables.  
In match, there is a stage column, what is it?

In [5]:
tables = pd.read_sql("""SELECT *
                        FROM sqlite_master
                        WHERE type='table';""", conn)
tables

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,sqlite_sequence,sqlite_sequence,4,"CREATE TABLE sqlite_sequence(name,seq)"
1,table,Player_Attributes,Player_Attributes,11,"CREATE TABLE ""Player_Attributes"" (\n\t`id`\tIN..."
2,table,Player,Player,14,CREATE TABLE `Player` (\n\t`id`\tINTEGER PRIMA...
3,table,Match,Match,18,CREATE TABLE `Match` (\n\t`id`\tINTEGER PRIMAR...
4,table,League,League,24,CREATE TABLE `League` (\n\t`id`\tINTEGER PRIMA...
5,table,Country,Country,26,CREATE TABLE `Country` (\n\t`id`\tINTEGER PRIM...
6,table,Team,Team,29,"CREATE TABLE ""Team"" (\n\t`id`\tINTEGER PRIMARY..."
7,table,Team_Attributes,Team_Attributes,2,CREATE TABLE `Team_Attributes` (\n\t`id`\tINTE...


In [6]:
# get Match table
query = """
select * from Match ;
"""
match_df = pd.read_sql(query,conn)

In [39]:
# some constants
season_ls = ["2008/2009","2009/2010","2010/2011","2011/2012","2012/2013","2013/2014","2014/2015","2015/2016"]


In [110]:
# function to make goal difference on each season.
def _goal_dif_season(season, match_df, period):
    # get data for that season.
    goal_dif = match_df[match_df.season==season][["home_team_goal","away_team_goal","date"]]
    # make new columns on date.
    goal_dif["home_goal_dif"] = goal_dif.home_team_goal - goal_dif.away_team_goal
    goal_dif["my_date"] = pd.to_datetime(goal_dif.date)
    if period == "dow":
        goal_dif["match_dow"] = goal_dif.my_date.dt.day_name()
        # get the goal dif on dow.
        goal_dif_period = goal_dif.groupby(by="match_dow").mean()[["home_goal_dif","home_team_goal","away_team_goal"]]
    elif period == 'month':
        goal_dif["match_month"] = goal_dif.my_date.dt.month
        goal_dif_period = goal_dif.groupby(by="match_month").mean()[["home_goal_dif","home_team_goal","away_team_goal"]]
    goal_dif_period.reset_index(inplace=True)
    return goal_dif_period

In [176]:
# the drawing function to make goal difference on each season.
def _draw_goal_dif(goal_dif, season, period):
    # get index
    if period == "dow":
        colors = ["#66CDAA", "#E3CF57", "#1C86EE"]
        order = ["Monday", "Tuesday", "Wednesday",
                 "Thursday", "Friday", "Saturday", "Sunday"]
        ind1 = [list(goal_dif.match_dow).index(i) for i in order]
    elif period == "month":
        # get colors list. [lightblue1,lightcyan2,lightgrey,chartreuse1,chartreuse3,
        # darkgreen,cornflowerblue,aquamarine3, dodgerblue2, banana,darkorange,darkorange3]
        colors = ["#BFEFFF", "#D1EEEE", "#D3D3D3", "#6495ED", "#66CD00",
                  "#006400", "#66CDAA", "#1C86EE", "#E3CF57", "#FF8C00", "#CD6600"]
        order = list(range(1, 6)) + list(range(7,13))
        ind1 = [list(goal_dif.match_month).index(i) for i in order]
    line_colours = ["#DC143C","#1C86EE","#006400"]
    # build traces
    data = []
    cols = ["home_goal_dif", "home_team_goal", "away_team_goal"]
    names = ["Home Goals - Away Goals", "Home Goals", "Away Goals"]
    for ind, col_name in enumerate(cols):
        trace = go.Scatter(
            x=order,
            y=goal_dif[col_name][ind1],
            name=names[ind],
            text=["Mean goal difference:"+str(round(i,2))
                  for i in goal_dif[col_name]],
            marker=dict(
                color=line_colours[ind],
                line=dict(
                    color=line_colours[ind],
                    width=1.5,
                )
            ),
            opacity=1
        )
        data.append(trace)
    layout = go.Layout(
        title="The average goal difference in season %s per %s." % (
            season, period),
        xaxis=dict(
            title=period
        ),
        yaxis=dict(
            title='Goal Differences'
        ),
        bargap=0.2,
        bargroupgap=0.1,
        paper_bgcolor='rgb(243, 243, 243)',
        plot_bgcolor='rgb(243, 243, 243)'
    )
    fig1 = go.Figure(data=data, layout=layout)
    return fig1

In [184]:
res = []

In [185]:
for season in season_ls:
    goal_dif_dow = _goal_dif_season(season, match_df, "dow")
    fig1 = _draw_goal_dif(goal_dif_dow,season,"dow")
    # put graph into html file.
    _div = plot(fig1, include_plotlyjs=False, output_type='div')
    res.append((_div,season))

In [186]:
# write html file.

html_doc = '''<!DOCTYPE html>
<html>
<head>
  <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
</head>

<div>
     <div style="display:flex">
     <h2><font color="black">European Soccer Data Visualization</font></h2>
     </div>
</div>
'''  
for i in res:
    html_doc += '''<h5><a id="1"></a>%(_div_title)s</h5> 
                      %(div)s
                ''' % {'div':i[0], '_div_title':i[1]}

        
        
html_doc += '''<hr>'''
html_doc +='''</body>
                  </html>'''


In [187]:
with open('EU_soccer_data_viz_dow.html', 'w') as f:
    f.write(html_doc)

In [188]:
!open EU_soccer_data_viz_dow.html