In [1]:
import os
import pymongo
import pandas as pd
import requests
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt  # To visualize
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from pymongo import MongoClient
import statsmodels.api as sm


In [2]:
class DBConn():

    def __init__(self, DB_NAME='PremierLeague'):
        self.db_user = os.environ.get('DB_user')
        self.db_pass = os.environ.get('DB_pass')
        self.MONGODB_URL = f'mongodb+srv://{self.db_user}:{self.db_pass}@cluster0-mbqxj.mongodb.net/<dbname>?retryWrites=true&w=majority'
        self.client = MongoClient(self.MONGODB_URL)
        self.DATABASE = self.client[DB_NAME]


In [3]:
"""Get pass stats"""
db = DBConn()
coll = db.DATABASE['fixture_players_stats']
pipeline = [
    {
        '$match':{
            'seasonId': 363
        }
    },
    {
        '$project':{
            '_id': 0,
            'total_pass': 1,
            'id': 1,
            'f_id':1,
            'position':1,
            'mins_played': 1,
            
        }
    }
]
passes = list(coll.aggregate(pipeline))

In [4]:
"""Get pass stats"""
db = DBConn()
coll = db.DATABASE['fixture_stats']
pipeline = [
    {
        '$match':{
            'seasonId': 363,
            '$and': [
                {'formation': {'$exists': True, '$not': {'$size': 0}}}
            ],
        }
    },
    {
        '$project':{
            '_id': 0,
            'id': 1,
            'away_team_id':1,
            'home_team_id':1,
            'home_team_score': 1,
            'away_team_score': 1,
            'formation': 1
            
        }
    }
]
stats_query = list(coll.aggregate(pipeline))

In [5]:
def create_stats():
    stats = []
    for i in stats_query:
        stats_temp = {}
        stats_temp['f_id'] = i['id']
        stats_temp['home_team_id'] = i['home_team_id']
        stats_temp['away_team_id'] = i['away_team_id']
        stats_temp['home_team_score'] = i['home_team_score']
        stats_temp['away_team_score'] = i['away_team_score']
        stats_temp['home_formation'] = i['formation'][0]['label']
        stats_temp['away_formation'] = i['formation'][1]['label']
        stats.append(stats_temp)
    return stats

In [6]:
stats_df = pd.DataFrame(create_stats())
df = pd.DataFrame(passes)
fixtures = df.merge(stats_df, on='f_id', how='left')


In [30]:
def select_position(df, pos):
    df = df[df.position == pos]
    df = df.fillna(0)
    df = pd.get_dummies(df)
    cols = df.columns.tolist()
    cols = cols[3:4] + cols[:3] + cols[4:]
    df = df[cols]
    return df

defenders = select_position(fixtures, 'D')
forwards = select_position(fixtures, 'F')
midfield = select_position(fixtures, 'M')

In [49]:
mid_clean = midfield[midfield.mins_played > 45]
mid_players = mid_clean['id'].value_counts()


In [63]:
def get_key_players(players_df):
    df = pd.DataFrame()
    for v, i in players_df.items():
        """Get pass stats"""
        coll = db.DATABASE['fixture_players_stats']
        pipeline = [
            {
                '$match':{
                    'id': v
                }
            },
            {
                '$project':{
                    '_id': 0,
                    'total_pass': 1,
                    'id': 1,
                    'f_id':1,
                    'position':1,
                    'mins_played': 1,
                    'seasonId': 1


                }
            }
        ]
        player = list(coll.aggregate(pipeline))
        df = df.append(player)
    return df

In [65]:
key_players = get_key_players(mid_players)

In [70]:
key_players = key_players.fillna(0)

In [77]:
best = key_players.loc[df['total_pass'] >= 70]
ranking = best['id'].value_counts().sort_values(ascending=False)

In [110]:
def best_pass_player(p_id):
    print(p_id)
    players = db.DATABASE['player_stats']
    pipeline = [
        {
            '$match':{
                'id': p_id
            }
        },
        {
        '$group': {
            '_id': 'null',
            'name': { '$addToSet': "$name" }
            }
        },
        {
            '$unwind': '$name'
        },
        {
            '$project':{
                '_id': 0,
            }
        }
    ]
    result = list(players.aggregate(pipeline))
    print(result)
    

In [111]:
for v, i in ranking.items():
    best_pass_player(v)

22542.0
[{'name': 'Pascal Groß'}]
10428.0
[{'name': 'Ainsley Maitland-Niles'}]
3564.0
[{'name': 'Marc Albrighton'}]
4421.0
[{'name': 'Gylfi Sigurdsson'}]
7488.0
[{'name': 'Harry Winks'}]
3712.0
[{'name': 'Jordan Henderson'}]
4288.0
[{'name': 'Kevin De Bruyne'}]
4804.0
[{'name': 'Fernandinho'}]
5272.0
[{'name': 'Pierre-Emile Højbjerg'}]
8168.0
[{'name': 'Dale Stephens'}]
5859.0
[{'name': 'Luka Milivojevic'}]
4549.0
[{'name': 'Moussa Sissoko'}]
4286.0
[{'name': 'Oriol Romeu'}]
13823.0
[{'name': 'Abdoulaye Doucouré'}]
8052.0
[{'name': 'Ryan Fraser'}]
13389.0
[{'name': 'Tom Davies'}]
4713.0
[{'name': 'Isaac Hayden'}]
3811.0
[{'name': 'Nathan Redmond'}]
4287.0
[{'name': 'Juan Mata'}]
20479.0
[{'name': 'Wilfred Ndidi'}, {'name': 'Onyinye Wilfred Ndidi'}]
4224.0
[{'name': 'James McArthur'}]
3861.0
[{'name': 'Nemanja Matic'}]
7114.0
[{'name': 'Georginio Wijnaldum'}]
6210.0
[{'name': 'Jóhann Gudmundsson'}]
13492.0
[{'name': "N'Golo Kanté"}]
5861.0
[{'name': 'Cheikhou Kouyaté'}]
5067.0
[{'name':

In [None]:
"""Create reverse pipeline for extracting odds for a specific type of odds-game
1. Look-up odds-game om betting site. e.g. Pogba o/u 30.5 passes
2. Input to pipeline is than player name
3. output is odds of player covering odds
4. Deside if fair odds
"""

In [None]:
"""Pipeline to display key players for passes"""
"""
.project({
    '_id': 0,
    'total_pass': 1,
    'id': 1,
    'f_id':1,
    'position':1,
    'mins_played': 1,
    'seasonId': 1
    'passesPerMin': $divide: ['total_pass', 'mins_played']
})
.sort({
    'passesPerMin': -1
})


"""