In [1]:
import os
import pymongo
import pandas as pd
from functools import reduce
import requests
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt  # To visualize
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from pymongo import MongoClient
import statsmodels.api as sm


In [2]:
class DBConn():

    def __init__(self, DB_NAME='PremierLeague'):
        self.db_user = os.environ.get('DB_user')
        self.db_pass = os.environ.get('DB_pass')
        self.MONGODB_URL = f'mongodb+srv://{self.db_user}:{self.db_pass}@cluster0-mbqxj.mongodb.net/<dbname>?retryWrites=true&w=majority'
        self.client = MongoClient(self.MONGODB_URL)
        self.DATABASE = self.client[DB_NAME]


In [3]:
"""Get pass stats"""
db = DBConn()
coll = db.DATABASE['fixture_info']
pipeline = [
    {
        '$match':{
            'seasonId': 363,
            '$and': [
                { 'status': {'$eq': 'C'} }
            ],
        },
    },
    {
        '$project':{
            '_id': 0,
            'fId': 1,
            'formation': 1,
            'lineUps': 1,
            'substitutes': 1 
        },  
    },
    {
        '$limit': 10
    },
]
stats_query = list(coll.aggregate(pipeline))
pprint(stats_query)

[{'fId': 58898,
  'formation': [{'label': '4-2-3-1',
                 'players': [[10727],
                             [12536, 4582, 4276, 8185],
                             [4818, 3894],
                             [20178, 7490, 5079],
                             [11267]],
                 'teamId': 34},
                {'label': '3-4-3',
                 'players': [[4985],
                             [11575, 50234, 12185],
                             [4474, 5239, 12136, 10428],
                             [4748, 6899, 5110]],
                 'teamId': 1}],
  'lineUps': [{'captain': False,
               'first': 'Joe',
               'id': 8185,
               'last': 'Bryan',
               'matchPosition': 'D',
               'name': 'Joe Bryan',
               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Left Full Back',
               'shirtNum': 23,
               'teamId': 34},
              {'captain': False,
               'first': '

 {'fId': 58897,
  'formation': [{'label': '4-4-2',
                 'players': [[7548],
                             [3929, 5861, 3807, 24334],
                             [3724, 4224, 3979, 8980],
                             [4539, 5595]],
                 'teamId': 6},
                {'label': '4-4-2',
                 'players': [[4573],
                             [13815, 4612, 19589, 2886],
                             [14997, 4617, 4286, 3811],
                             [10905, 8245]],
                 'teamId': 20}],
  'lineUps': [{'captain': True,
               'first': 'Scott',
               'id': 3807,
               'last': 'Dann',
               'matchPosition': 'D',
               'name': 'Scott Dann',
               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Centre Central Defender',
               'shirtNum': 6,
               'teamId': 6},
              {'captain': False,
               'first': 'Cheikhou',
               'id

               'first': 'Alisson',
               'id': 20559,
               'last': 'Ramses Becker',
               'matchPosition': 'G',
               'name': 'Alisson',
               'playerId': 0,
               'position': 'G',
               'positionInfo': 'Goalkeeper',
               'shirtNum': 1,
               'teamId': 10},
              {'captain': False,
               'first': 'Virgil',
               'id': 5140,
               'last': 'van Dijk',
               'matchPosition': 'D',
               'name': 'Virgil van Dijk',
               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Centre Central Defender',
               'shirtNum': 4,
               'teamId': 10},
              {'captain': False,
               'first': 'Sadio',
               'id': 6519,
               'last': 'Mané',
               'matchPosition': 'F',
               'name': 'Sadio Mané',
               'playerId': 0,
               'position': 'F',
           

                   'teamId': 9},
                  {'captain': False,
                   'first': 'Francisco',
                   'id': 16793,
                   'last': 'Casilla Cortés',
                   'matchPosition': 'G',
                   'name': 'Kiko Casilla',
                   'playerId': 0,
                   'position': 'G',
                   'positionInfo': 'Goalkeeper',
                   'shirtNum': 13,
                   'teamId': 9},
                  {'captain': False,
                   'first': 'Ian',
                   'id': 19929,
                   'last': 'Poveda-Ocampo',
                   'matchPosition': 'F',
                   'name': 'Ian Poveda-Ocampo',
                   'playerId': 0,
                   'position': 'M',
                   'positionInfo': 'Winger',
                   'shirtNum': 7,
                   'teamId': 9}]},
 {'fId': 58903,
  'formation': [{'label': '4-2-3-1',
                 'players': [[3309],
                             [

                  {'captain': False,
                   'first': 'Miguel',
                   'id': 54312,
                   'last': 'Almirón',
                   'matchPosition': 'M',
                   'name': 'Miguel Almirón',
                   'playerId': 0,
                   'position': 'M',
                   'positionInfo': 'Left/Centre/Right Second Striker',
                   'shirtNum': 24,
                   'teamId': 23},
                  {'captain': False,
                   'first': 'Mark',
                   'id': 8321,
                   'last': 'Gillespie',
                   'matchPosition': 'G',
                   'name': 'Mark Gillespie',
                   'playerId': 0,
                   'position': 'G',
                   'positionInfo': 'Goalkeeper',
                   'shirtNum': 29,
                   'teamId': 23},
                  {'captain': False,
                   'first': 'Sean',
                   'id': 14897,
                   'last': 'Longstaf

               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Centre/Right Central Defender',
               'shirtNum': 4,
               'teamId': 21},
              {'captain': False,
               'first': 'Heung-Min',
               'id': 4999,
               'last': 'Son',
               'matchPosition': 'M',
               'name': 'Son Heung-Min',
               'playerId': 0,
               'position': 'F',
               'positionInfo': 'Left/Centre/Right Winger',
               'shirtNum': 7,
               'teamId': 21},
              {'captain': False,
               'first': 'Lucas',
               'id': 5762,
               'last': 'Rodrigues Moura da Silva',
               'matchPosition': 'M',
               'name': 'Lucas Moura',
               'playerId': 0,
               'position': 'F',
               'positionInfo': 'Right Winger',
               'shirtNum': 27,
               'teamId': 21},
              {'captain': False,
       

               'shirtNum': 9,
               'teamId': 18},
              {'captain': False,
               'first': 'John',
               'id': 4394,
               'last': 'Egan',
               'matchPosition': 'D',
               'name': 'John Egan',
               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Centre Central Defender',
               'shirtNum': 12,
               'teamId': 18},
              {'captain': False,
               'first': 'Jack',
               'id': 9491,
               'last': "O'Connell",
               'matchPosition': 'D',
               'name': "Jack O'Connell",
               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Centre Central Defender',
               'shirtNum': 5,
               'teamId': 18},
              {'captain': False,
               'first': 'John',
               'id': 4509,
               'last': 'Lundstram',
               'matchPosition': 'M',
            

               'teamId': 131},
              {'captain': False,
               'first': 'Solly',
               'id': 8171,
               'last': 'March',
               'matchPosition': 'M',
               'name': 'Solly March',
               'playerId': 0,
               'position': 'M',
               'positionInfo': 'Left/Centre/Right Attacking Midfielder',
               'shirtNum': 20,
               'teamId': 131},
              {'captain': False,
               'first': 'Steven',
               'id': 20723,
               'last': 'Alzate',
               'matchPosition': 'M',
               'name': 'Steven Alzate',
               'playerId': 0,
               'position': 'M',
               'positionInfo': 'Left/Centre/Right Central Midfielder',
               'shirtNum': 17,
               'teamId': 131},
              {'captain': False,
               'first': 'Mat',
               'id': 12192,
               'last': 'Ryan',
               'matchPosition': 'G',
            

               'shirtNum': 5,
               'teamId': 7},
              {'captain': True,
               'first': 'Séamus',
               'id': 3600,
               'last': 'Coleman',
               'matchPosition': 'D',
               'name': 'Séamus Coleman',
               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Right Full Back',
               'shirtNum': 23,
               'teamId': 7},
              {'captain': False,
               'first': 'Yerry',
               'id': 32923,
               'last': 'Mina',
               'matchPosition': 'D',
               'name': 'Yerry Mina',
               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Centre Central Defender',
               'shirtNum': 13,
               'teamId': 7},
              {'captain': False,
               'first': 'Lucas',
               'id': 5758,
               'last': 'Digne',
               'matchPosition': 'D',
               'name': 

               'shirtNum': 43,
               'teamId': 9},
              {'captain': False,
               'first': 'Stuart',
               'id': 8148,
               'last': 'Dallas',
               'matchPosition': 'D',
               'name': 'Stuart Dallas',
               'playerId': 0,
               'position': 'M',
               'positionInfo': 'Left/Centre/Right Winger',
               'shirtNum': 15,
               'teamId': 9},
              {'captain': False,
               'first': 'Robin',
               'id': 26220,
               'last': 'Koch',
               'matchPosition': 'D',
               'name': 'Robin Koch',
               'playerId': 0,
               'position': 'D',
               'positionInfo': 'Centre Central Defender',
               'shirtNum': 5,
               'teamId': 9},
              {'captain': True,
               'first': 'Liam',
               'id': 3622,
               'last': 'Cooper',
               'matchPosition': 'D',
               '

In [4]:
df = pd.DataFrame.from_records(stats_query)


In [5]:
lineups = pd.concat([pd.DataFrame(x) for x in df['lineUps']], keys=df['fId']).reset_index(level=1, drop=True).reset_index() 
lineups['start'] = 1

In [6]:
formation = pd.concat([pd.DataFrame(x) for x in df['formation']], keys=df['fId']).reset_index(level=1, drop=True).reset_index()
formation = formation.drop('players', axis=1)

In [7]:
substitutes = pd.concat([pd.DataFrame(x) for x in df['substitutes']], keys=df['fId']).reset_index(level=1, drop=True).reset_index() 
substitutes['sub'] = 1

In [8]:
data = lineups.append(substitutes)
data = pd.merge(data, formation, how='left', on=['fId', 'teamId'])

In [11]:
games = data.drop_duplicates(subset=['fId','teamId'])
games = games[['fId','teamId']]

In [25]:

def get_stats(fId, teamId):
    db = DBConn()
    coll = db.DATABASE['fixture_stats']
    pipeline = [
        {
            '$match':{
                'fId': int(fId),
                '$and': [
                    { 'teamId': {'$eq': teamId} }
                ],
            },
        },
    ]
    stats_query = list(coll.aggregate(pipeline))
    pprint(stats_query)

In [26]:
for index, row in games.iterrows():
    fId = row['fId']
    teamId = row['teamId']
    get_stats(fId, teamId)

InvalidDocument: cannot encode object: 34, of type: <class 'numpy.int64'>