In [1]:
import requests
import json
import csv
import pandas as pd
import os

In [2]:
# Constants
API_BASE_URL = "https://api.globoesporte.globo.com/tabela/d1a37fa4-e948-43a6-ba53-ab24ab3a45b1/fase/fase-unica-campeonato-brasileiro-2023/rodada/"
csv_filename = "matches.csv"

In [3]:
# Function to get the results for the first round of a championship
def get_round_results(round):
    endpoint = f"{round}/jogos/"
    response = requests.get(API_BASE_URL + endpoint) # , headers=headers)
    response.encoding = 'utf-8'
    return response.json()

In [4]:
# define and run test to see if format and length being received are correct
def test__get_round_results():
    response = get_round_results(1)
    if type(response) != type([]):
        print('Error: Response format invalid\nFormat: ', type(response))
        return
    if len(response) != 10:
        print('Error: Response length not correct\nLength: ', len(response))
        return
    print('Tests Passed!')

test__get_round_results()

Tests Passed!


In [5]:
# function that writes all match details in the round to a csv
def add_matches_to_csv(matches_in_round, championship_round_id, csv_filename):

    with open(csv_filename, mode='a', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file)

        for match in matches_in_round:
            date = match.get('data_realizacao', '').split('T')[0]
            time = match.get('hora_realizacao', '')
            home_team_name = match.get('equipes', {}).get('mandante', {}).get('nome_popular', '')
            away_team_name = match.get('equipes', {}).get('visitante', {}).get('nome_popular', '')
            home_team_score = match.get('placar_oficial_mandante', '')
            away_team_score = match.get('placar_oficial_visitante', '')
            stadium_name = match.get('sede', {}).get('nome_popular', '')

            csv_writer.writerow([date, time, home_team_name, away_team_name, home_team_score, away_team_score, championship_round_id, stadium_name])

In [None]:
def add_headers_to_csv(csv_filename):
    with open(csv_filename, mode='a', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(['date', 'time', 'home_team_name', 'away_team_name', 'home_team_score', 'away_team_score', 'championship_round_id', 'stadium_name'])

In [6]:
#function that writes all matches in the championship up to a given round on a csv
def add_all_matches_to_csv(final_round, csv_filename):
    for i in range(1,final_round+1):
        round_results = get_round_results(i)
        add_matches_to_csv(round_results,i,csv_filename)
        

In [7]:
# write csv file with all match details
add_all_matches_to_csv(33, csv_filename)

In [10]:
# read from csv to pandas for data visualization
all_matches = pd.read_csv(csv_filename)
all_matches.head()

Unnamed: 0,date,time,home_team_name,away_team_name,home_team_score,away_team_score,championship_round_id,stadium_name
0,2023-04-15,16:00,Palmeiras,Cuiabá,2.0,1.0,1,Allianz Parque
1,2023-04-15,16:00,América-MG,Fluminense,0.0,3.0,1,Independência
2,2023-04-15,18:30,Botafogo,São Paulo,2.0,1.0,1,Nilton Santos (Engenhão)
3,2023-04-15,18:30,Bragantino,Bahia,2.0,1.0,1,Nabi Abi Chedid
4,2023-04-15,18:30,Athletico-PR,Goiás,2.0,0.0,1,Ligga Arena


In [11]:
# Save all team names
team_names = all_matches["home_team_name"].unique()

### Data Storage Target

Dictionary

(Key) - String - Club Name
(Value) - Array of Arrays - for each round, an array with: (Points, Victories, Draws, Losses, Goal Difference, Goals Scored, Goals Suffered, Round Ranking)

In [14]:
# Function to calculate the points and other statistics for each team
def calculate_team_stats(df):
    team_stats = {}

    for index, row in df.iterrows():
        home_team = row['home_team_name']
        away_team = row['away_team_name']
        home_score = row['home_team_score']
        away_score = row['away_team_score']
        round_number = row['championship_round_id']

        # Initialize team stats if not already present
        if home_team not in team_stats:
            team_stats[home_team] = []
        if away_team not in team_stats:
            team_stats[away_team] = []

        # Calculate stats for each team
        for team in [home_team, away_team]:
            while len(team_stats[team]) < round_number:
                # Previous round stats or initial stats
                prev_stats = team_stats[team][-1] if team_stats[team] else [0, 0, 0, 0, 0, 0, 0, 0]
                team_stats[team].append(prev_stats.copy())

            # Get current round stats
            current_stats = team_stats[team][-1]

            # Check if scores are NaN
            if pd.isna(home_score) or pd.isna(away_score):
                # Skip updates if scores are NaN, keeping previous stats
                continue

            # Update stats based on this match
            if team == home_team:
                current_stats[1] += 1 if home_score > away_score else 0  # Wins
                current_stats[2] += 1 if home_score == away_score else 0  # Draws
                current_stats[3] += 1 if home_score < away_score else 0  # Losses
                current_stats[4] += home_score - away_score # Goal difference
                current_stats[5] += home_score   # Goals scored
                current_stats[6] += away_score   # Goals suffered
            else:
                current_stats[1] += 1 if away_score > home_score else 0  # Wins
                current_stats[2] += 1 if away_score == home_score else 0  # Draws
                current_stats[3] += 1 if away_score < home_score else 0  # Losses
                current_stats[4] += away_score - home_score # Goal difference
                current_stats[5] += away_score # Goals scored
                current_stats[6] += home_score # Goals suffered

            # Update points (3 for a win, 1 for a draw)
            current_stats[0] = current_stats[1] * 3 + current_stats[2]
            current_stats[7] = round_number

    return team_stats

In [15]:
# write dicstionary for all teams
teams_evolution = calculate_team_stats(all_matches)

### Data Storage Target

Dictionary

(Key) - String - Club Name
(Value) - Array - Total Points after each round

In [16]:
# create simple dictionary following structure above

def return_points(json_dict):
    teams_points = {}
    for key, value in teams_evolution.items():
        teams_points[key] = []
        for array in value:
            teams_points[key].append(array[0])
    return teams_points

In [17]:
# save simple dictionary
teams_points = return_points(teams_evolution)

# Specify the filename for the JSON file
json_filename = 'team_points.json'

# Writing dictionary data to a JSON file
if not(os.path.exists(json_filename)):
    with open(json_filename, 'w', encoding='utf-8') as file:
        json.dump(teams_points, file, ensure_ascii=False, indent=4)
else:
    print('File already exists at: ', json_filename, '\nFunction did not write new data.')