In [2]:
# Importa as bibliotecas necessárias
import requests
import json
import pandas as pd
from datetime import datetime, date, timedelta
from google.cloud import bigquery
from google.api_core.exceptions import NotFound
from google.oauth2 import service_account
from google.cloud import bigquery
from dotenv import load_dotenv
import os

In [3]:
# ### Para autenticação no Google Colab
# from google.colab import auth
# auth.authenticate_user()

In [4]:
### Para execução local
# Caminho para o arquivo JSON da chave de serviço
key_path = "./key/betfullx-soccer-447401-fd91f214194a.json"

# Autenticação
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials=credentials, project=credentials.project_id)


In [5]:
# Carrega as variáveis do arquivo .env
load_dotenv()

# Acessa as variáveis de ambiente
API_KEY = os.getenv('API_KEY')
API_URL = os.getenv('API_URL')
PROJECT_ID = os.getenv('PROJECT_ID')
DATASET_NAME = os.getenv('DATASET_NAME')
FULL_LOAD_DATE = os.getenv('FULL_LOAD_DATE')
LEAGUE = int(os.getenv('LEAGUE'))
SEASON = int(os.getenv('SEASON'))

Python-dotenv could not parse statement starting at line 9


In [6]:
endpoints = [
    {
        "table": "past_fixtures",
        "write_disposition": "WRITE_APPEND",
        "path": "fixtures",
        "quality_control": False,
        "params": {
            "league": LEAGUE,
            "season": SEASON
        },
        "incremental_load_params": {
            "from": "YYYY-MM-DD",
            "to": "YYYY-MM-DD"
        },
        "fields": [],
        "nested_fields": [
            "fixture.id",
            "fixture.referee",
            "fixture.timezone",
            "fixture.date",
            "fixture.timestamp",
            "fixture.periods.first",
            "fixture.periods.second",
            "fixture.venue.id",
            "fixture.venue.name",
            "fixture.venue.city",
            "fixture.status.long",
            "fixture.status.short",
            "fixture.status.elapsed",
            "league.id",
            "league.name",
            "league.country",
            "league.logo",
            "league.flag",
            "league.season",
            "league.round",
            "teams.home.id",
            "teams.home.name",
            "teams.home.logo",
            "teams.home.winner",
            "teams.away.id",
            "teams.away.name",
            "teams.away.logo",
            "teams.away.winner",
            "goals.home",
            "goals.away",
            "score.halftime.home",
            "score.halftime.away",
            "score.fulltime.home",
            "score.fulltime.away",
            "score.extratime.home",
            "score.extratime.away",
            "score.penalty.home",
            "score.penalty.away"
        ],
        "repeatable_fields": []
    },
    {
        "table": "future_fixtures",
        "write_disposition": "WRITE_TRUNCATE",
        "path": "fixtures",
        "quality_control": False,
        "params": {
            "league": LEAGUE,
            "season": SEASON,
            "to": "2099-12-31"
        },
        "incremental_load_params": {
            "from": "YYYY-MM-DD",
        },
        "fields": [],
        "nested_fields": [
            "fixture.id",
            "fixture.timezone",
            "fixture.date",
            "fixture.timestamp",
            "fixture.venue.id",
            "fixture.venue.name",
            "fixture.venue.city",
            "fixture.status.long",
            "fixture.status.short",
            "league.id",
            "league.name",
            "league.country",
            "league.logo",
            "league.flag",
            "league.season",
            "league.round",
            "teams.home.id",
            "teams.home.name",
            "teams.home.logo",
            "teams.away.id",
            "teams.away.name",
            "teams.away.logo"
        ],
        "repeatable_fields": []
    },
    {
        "table": "players",
        "write_disposition": "WRITE_TRUNCATE",
        "path": "players",
        "quality_control": True,
        "params": {
            "league": LEAGUE,
            "season": SEASON,
            "page": 1
        },
        "fields": [],
        "nested_fields": [
            "player.id",
            "player.name",
            "player.firstname",
            "player.lastname",
            "player.age",
            "player.birth.date",
            "player.birth.place",
            "player.nationality",
            "player.height",
            "player.weight",
            "player.injured",
            "player.photo"
        ],
        "repeatable_fields": []
    },
]

iterable_endpoints = {
    "past_fixtures": [
        {
            "table": "fixturesStatistics",
            "write_disposition": "WRITE_APPEND",
            "path": "fixtures/statistics",
            "query_param": {
                "fixture": "fixture.id"
            },
            "fixed_params": {},
            "fields": ["fixture"],
            "nested_fields": [
                "team.id",
                "team.name",
                "team.logo"
            ],
            "repeatable_fields": [
                "statistics"
            ]
        },
        {
            "table": "fixturesLineups",
            "write_disposition": "WRITE_APPEND",
            "path": "fixtures/lineups",
            "query_param": {
                "fixture": "fixture.id"
            },
            "fixed_params": {},
            "fields": [
                "fixture",
                "formation"
            ],
            "nested_fields": [
                "team.id"
            ],
            "repeatable_fields": [
                "startXI"
            ]
        }
    ]
}

In [None]:
def fetch_data(path, params, headers):
    all_data = []
    while True:
        response = requests.get(f"{API_URL}/{path}", headers=headers, params=params)
        data = response.json()
        print(response.text)
        if 'response' in data and data['response']:
            all_data.extend(data['response'])
            if 'page' in params:
                params['page'] += 1
            else:
                break
        else:
            break
    return all_data

def fetch_iterable_data(main_data, iterable_endpoint):
    all_data = []
    for _, item in main_data.iterrows():
        query_params = {key: item[value.replace('.', '__')] for key, value in iterable_endpoint["query_param"].items()}
        params = query_params.copy()
        params.update(iterable_endpoint['fixed_params'])
        data = fetch_data(iterable_endpoint['path'], params, HEADERS)

        iterated_data = [{**query_params, **item} for item in data]
        all_data.extend(iterated_data)
    return all_data

def prepare_dataframe(data, fields, nested_fields, repeatable_fields):
    for item in data:
        for field in repeatable_fields:
            if field in item:
                for sub_item in item[field]:
                    for key in sub_item.keys():
                        sub_item[key] = str(sub_item[key])

    meta_fields = fields + repeatable_fields

    df = pd.json_normalize(data, sep='__', meta=meta_fields)

    all_fields = fields + nested_fields + repeatable_fields
    column_names = [col.replace('.', '__') for col in all_fields]

    existing_columns = [col for col in column_names if col in df.columns]

    df = df[existing_columns]
    return df