In [7]:
import os
import pymongo
import pandas as pd
import requests
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt  # To visualize
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from pymongo import MongoClient
import statsmodels.api as sm


In [8]:
class DBConn():

    def __init__(self, DB_NAME='PremierLeague'):
        self.db_user = os.environ.get('DB_user')
        self.db_pass = os.environ.get('DB_pass')
        self.MONGODB_URL = f'mongodb+srv://{self.db_user}:{self.db_pass}@cluster0-mbqxj.mongodb.net/<dbname>?retryWrites=true&w=majority'
        self.client = MongoClient(self.MONGODB_URL)
        self.DATABASE = self.client[DB_NAME]


In [11]:
"""Get pass stats"""
db = DBConn()
coll = db.DATABASE['fixture_info']
pipeline = [
    {
        '$match':{
            'seasonId': 363,
            '$and': [
                { 'status': {'$eq': 'C'} }
            ],
        }
    },
    {
        '$limit': 1
    }
]
stats_query = list(coll.aggregate(pipeline))
pprint(stats_query)

[{'_id': ObjectId('601ff45c3f053199f2740081'),
  'awayHalfTimeScore': 1,
  'city': 'London',
  'clockLabel': "90 +3'00",
  'clockSecs': 5580,
  'competition': 'Premier League',
  'competitionAbbr': 'EN_PR',
  'competitionId': 1,
  'events': [{'awayScore': 0,
              'clockLabel': "00'00",
              'clockSecs': 0,
              'homeScore': 0,
              'id': None,
              'phase': '1',
              'timeLabel': '12 September 2020, 12:32',
              'timeMillis': 1599910343000,
              'type': 'PS'},
             {'awayScore': 1,
              'clockLabel': "08'00",
              'clockSecs': 480,
              'homeScore': 0,
              'id': 89055,
              'phase': '1',
              'timeLabel': '12 September 2020, 12:40',
              'timeMillis': 1599910823000,
              'type': 'G'},
             {'awayScore': 1,
              'clockLabel': "26'00",
              'clockSecs': 1560,
              'homeScore': 0,
              'id': 116

               'name': 'Granit Xhaka',
               'playerId': 0,
               'position': 'M',
               'positionInfo': 'Centre Central Midfielder',
               'shirtNum': 34,
               'teamId': 1},
              {'captain': True,
               'first': 'Pierre-Emerick',
               'id': 5110,
               'last': 'Aubameyang',
               'matchPosition': 'F',
               'name': 'Pierre-Emerick Aubameyang',
               'playerId': 0,
               'position': 'F',
               'positionInfo': 'Centre Striker',
               'shirtNum': 14,
               'teamId': 1},
              {'captain': False,
               'first': 'Mohamed Naser',
               'id': 5239,
               'last': 'El Sayed Elneny',
               'matchPosition': 'M',
               'name': 'Mohamed Elneny',
               'playerId': 0,
               'position': 'M',
               'positionInfo': 'Centre Defensive Midfielder',
               'shirtNum': 25,
     

In [5]:
    {
        '$lookup': {
           'from': 'fixture_stats',
           'localField': 'fId',
           'foreignField': 'fId',
           'as': 'f_stats'
        }
    },

In [6]:
stats_df = pd.DataFrame(create_stats())
df = pd.DataFrame(passes)
fixtures = df.merge(stats_df, on='f_id', how='left')


In [30]:
def select_position(df, pos):
    df = df[df.position == pos]
    df = df.fillna(0)
    df = pd.get_dummies(df)
    cols = df.columns.tolist()
    cols = cols[3:4] + cols[:3] + cols[4:]
    df = df[cols]
    return df

defenders = select_position(fixtures, 'D')
forwards = select_position(fixtures, 'F')
midfield = select_position(fixtures, 'M')

In [49]:
mid_clean = midfield[midfield.mins_played > 45]
mid_players = mid_clean['id'].value_counts()


In [63]:
def get_key_players(players_df):
    df = pd.DataFrame()
    for v, i in players_df.items():
        """Get pass stats"""
        coll = db.DATABASE['fixture_players_stats']
        pipeline = [
            {
                '$match':{
                    'id': v
                }
            },
            {
                '$project':{
                    '_id': 0,
                    'total_pass': 1,
                    'id': 1,
                    'f_id':1,
                    'position':1,
                    'mins_played': 1,
                    'seasonId': 1


                }
            }
        ]
        player = list(coll.aggregate(pipeline))
        df = df.append(player)
    return df

In [65]:
key_players = get_key_players(mid_players)

In [70]:
key_players = key_players.fillna(0)

In [77]:
best = key_players.loc[df['total_pass'] >= 70]
ranking = best['id'].value_counts().sort_values(ascending=False)

In [110]:
def best_pass_player(p_id):
    print(p_id)
    players = db.DATABASE['player_stats']
    pipeline = [
        {
            '$match':{
                'id': p_id
            }
        },
        {
        '$group': {
            '_id': 'null',
            'name': { '$addToSet': "$name" }
            }
        },
        {
            '$unwind': '$name'
        },
        {
            '$project':{
                '_id': 0,
            }
        }
    ]
    result = list(players.aggregate(pipeline))
    print(result)
    