In [2]:
from bs4 import BeautifulSoup
import datetime as dt
import pandas as pd
import requests

def get_game_list():
    df = pd.read_csv("https://raw.githubusercontent.com/petebrown/update-player-stats/main/data/players_df.csv")

    df.sb_game_id = df.sb_game_id.str.replace("tpg", "")

    df["url"] = df.apply(lambda x: f"https://www.soccerbase.com/matches/additional_information.sd?id_game={x.sb_game_id}", axis=1)

    games = df[["url", "venue"]].drop_duplicates().to_dict("records")

    return games

def get_goals(target="both"):
    games = get_game_list()

    if target == "both":
        target_teams = ["for", "against"]
    elif target not in ["for", "against"]:
        return "Please enter 'for', 'against' or 'both'."
    else:
        target_teams = [target]

    team_order = ["teamA", "teamB"]

    goal_list = []

    for game in games:

        for target_team in target_teams:
            try:
                url = game["url"]
                venue = game["venue"]
                game_id = url.split("=")[1]

                if target_team == "for":
                    if venue == "H":
                        side = team_order[0]
                    elif venue == "A":
                        side = team_order[1]
                    else:
                        next
                elif target_team == "against":
                    if venue == "H":
                        side = team_order[1]
                    elif venue == "A":
                        side = team_order[0]
                    else:
                        next

                r = requests.get(url)
                doc = BeautifulSoup(r.text, 'html.parser')

                goals = doc.select(f'.matchInfo .goalscorers .{side} p span')

                for goal in goals:
                    player_id = goal.find("a")["href"].split("=")[1]
                    player_text = goal.text
                    player_name = player_text.split("(")[0].strip()
                    goal_details = player_text.split("(")[-1].replace(")","").strip()
                    minutes = goal_details.split(",")
                    
                    for minute in minutes:
                        if "pen" in minute:
                            penalty = 1
                            minute = minute.replace("pen ", "")
                        else:
                            penalty = 0

                        if "og" in minute:
                            own_goal = 1
                            minute = minute.replace("og ", "")
                        else:
                            own_goal = 0

                        if "s/o" in minute:
                            next
                        else:
                            record = {
                                "game_id": int(game_id),
                                "player_id": int(player_id),
                                "player_name": player_name,
                                "minute": int(minute.strip()),
                                "penalty": penalty,
                                "own_goal": own_goal,
                                "goal_type": target_team,
                                "goal_details": goal_details,
                            }
                            goal_list.append(record)
                            print(record)
            except:
                next

    df = pd.DataFrame(goal_list)
    
    manual_goals = pd.read_csv("manual_goals.csv")
    manual_goals = manual_goals[manual_goals.goal_type.isin(target_teams)]

    df = pd.concat([df, manual_goals]).drop_duplicates(ignore_index=True)

    return df

# if __name__ == "__main__":
#     df = get_goals()

In [8]:
df = get_goals()
df.to_csv("goals.csv", index=False)

{'game_id': 830464, 'player_id': 114877, 'player_name': 'Josh McPake', 'minute': 75, 'penalty': 0, 'own_goal': 0, 'goal_type': 'for', 'goal_details': '75'}
{'game_id': 830464, 'player_id': 58504, 'player_name': 'Kane Hemmings', 'minute': 88, 'penalty': 1, 'own_goal': 0, 'goal_type': 'for', 'goal_details': 'pen 88'}
{'game_id': 830464, 'player_id': 100515, 'player_name': 'Fraser Horsfall', 'minute': 29, 'penalty': 0, 'own_goal': 0, 'goal_type': 'against', 'goal_details': '29, 37'}
{'game_id': 830464, 'player_id': 100515, 'player_name': 'Fraser Horsfall', 'minute': 37, 'penalty': 0, 'own_goal': 0, 'goal_type': 'against', 'goal_details': '29, 37'}
{'game_id': 830464, 'player_id': 69850, 'player_name': 'Mitch Pinnock', 'minute': 66, 'penalty': 0, 'own_goal': 0, 'goal_type': 'against', 'goal_details': '66'}
{'game_id': 830059, 'player_id': 51474, 'player_name': 'Callum McManaman', 'minute': 73, 'penalty': 0, 'own_goal': 0, 'goal_type': 'for', 'goal_details': '73'}
{'game_id': 830627, 'playe