In [None]:
import json
import os
import pandas as pd

from fcb_data_providers.database import Database
from fcb_data_providers.utils import get_logger

from fcb_data_providers.models import CardModel, EventModel, GoalModel, MatchModel, PeriodModel, PlayerModel, QualifierModel, ScoreModel, TeamModel

from fcb_data_providers.database_models import Card, Event, Goal, Match, Period, Player, Qualifier, Score, Team

from fcb_data_providers.providers import StatsPerformProvider

from dotenv import load_dotenv
load_dotenv()

In [None]:
from fcb_data_providers.version import __version__
__version__

In [None]:

DATABASE_URL=os.getenv("DATABASE_URL")
DATA_DIR = os.getenv("DATA_DIR")

In [None]:
DATABASE_URL, DATA_DIR

In [None]:
db = Database(DATABASE_URL)
# db.create_tables()
# session = db.get_session()
db.drop_tables()

## 1. Doing the data processing for events, and stats data using our python package

In [None]:
stats_class = StatsPerformProvider(data_path=DATA_DIR, database_url=DATABASE_URL)

In [None]:
stats_class.process_data()

### 2. Data Exploration for Events data.

In [None]:
# getting the session from the database
session = db.get_session()

In [None]:
# get all match event files

match_event_files = stats_class.get_match_related_files(file_type="match_event")
stats_class.logger.info(f"Match event files: {match_event_files}")

In [None]:
# get all match stats files

match_stats_files = stats_class.get_match_related_files(file_type="match_stats")
stats_class.logger.info(f"Match stats files: {match_stats_files}")

In [None]:
# getting dataframe from the first match event, and state file

df_events = pd.read_json(match_event_files[0])
df_stats = pd.read_json(match_stats_files[0])

In [None]:
# printing dataframe

df_events

In [None]:
# getting teams from the match events dataframe, and printing it for exploration.

contestants = df_events.loc['contestant', 'matchInfo']
print(json.dumps(contestants, indent=4))

In [None]:
# getting the match id from the match events dataframe

match_id = df_events.loc['id', 'matchInfo']
print(match_id)

In [None]:
# getting the match detail dictionary from the match events dataframe

match_details = df_events.loc['matchDetails', 'liveData']
print(json.dumps(match_details, indent=4))

In [None]:
# getting the events list of dictionaries from the match events dataframe

events = df_events.loc['event', 'liveData']
print(json.dumps(events, indent=4))

In [None]:
# creating Pydantic event model, to check if the data type checking is handled correctly by Pydantic model or not.

event = EventModel(
    id=events[0].get('id'),
    event_id=events[0].get('eventId'),
    type_id=events[0].get('typeId'),
    period_id=events[0].get('periodId'),
    time_min=events[0].get('timeMin'),
    time_sec=events[0].get('timeSec'),
    x=events[0].get('x'),
    y=events[0].get('y'),
    outcome=events[0].get('outcome'),
    timestamp=events[0].get('timestamp'),
    last_modified=events[0].get('lastModified'),
    match_id=df_events.loc['id', 'matchInfo'],
    player_id=events[0].get('playerId'),
    team_id=events[0].get('contestantId'),
)

In [None]:
# getting period list of dictionaries from the match details dictionary

periods = match_details.get("period")

In [None]:
# creating Pydantic period model, to check if the data type checking is handled correctly by Pydantic model or not.

period = PeriodModel(
    match_id=match_id,
    start_time=periods[0].get('start'),
    end_time=periods[0].get('end'),
    length_min=periods[0].get('lengthMin'),
    length_sec=periods[0].get('lengthSec'),
)

In [None]:
# creating Pydantic score model, to check if the data type checking is handled correctly by Pydantic model or not.

score = ScoreModel(
    match_id=df_events.loc['id', 'matchInfo'],
    ht_home=match_details['scores']['ht']['home'],
    ht_away=match_details['scores']['ht']['away'],
    ft_home=match_details['scores']['ft']['home'],
    ft_away=match_details['scores']['ft']['away'],
    total_home=match_details['scores']['total']['home'],
    total_away=match_details['scores']['total']['away'],
)

In [None]:
# inserting the data into the database, using database session scope context manager.

with db.session_scope() as session:
    session.add(Event(**event.model_dump()))
    session.add(Period(**period.model_dump()))


In [None]:
# data = Event(**event.model_dump())
# session.add(data)
# data = Period(**period.model_dump())
# session.add(data)
# data = Score(**score.model_dump())
# session.add(data)
# session.commit()

In [None]:
# if inserting failed, rollback the session
# session.rollback()

In [None]:
# getting  home and away team ids, to provide that info for match model

home_contestant_id = next(contestant['id'] for contestant in contestants if contestant['position'] == 'home')
away_contestant_id = next(contestant['id'] for contestant in contestants if contestant['position'] == 'away')

In [None]:
# creating Pydantic match model, to check if the data type checking is handled correctly by Pydantic model or not.

match = MatchModel(
    id=df_events.loc['id', 'matchInfo'],
    match_date=df_events.loc['localDate', 'matchInfo'],
    match_status= match_details['matchStatus'] if 'matchStatus' in match_details else None,
    home_team_id=home_contestant_id,
    away_team_id=away_contestant_id,
    winner=match_details['winner'] if 'winner' in match_details else None,
    match_length_min=match_details['matchLengthMin'] if 'matchLengthMin' in match_details else None,
    match_length_sec=match_details['matchLengthSec'] if 'matchLengthSec' in match_details else None,
)


In [None]:
# creating Pydantic team model, to check if the data type checking is handled correctly by Pydantic model or not.

team = TeamModel(
    id=contestants[0]['id'] ,
    name=contestants[0]['name'],
    short_name=contestants[0]['shortName'] if 'shortName' in contestants[0] else None,
    official_name=contestants[0]['officialName'] if 'officialName' in contestants[0] else None,
    code=contestants[0]['code'] if 'code' in contestants[0] else None,
)

In [None]:
# getting the event id from the first event list

event_id =  events[0].get('id')

In [None]:
# creating Pydantic qualifier model, to check if the data type checking is handled correctly by Pydantic model or not.

qualifier = QualifierModel(
    q_id= events[0]['qualifier'][0].get('id'),
    qualifier_id= events[0]['qualifier'][0].get('qualifierId'),
    value= events[0]['qualifier'][0].get('value'),
    event_id= events[0].get('id'),
)

### 3. Data Exploration for Stats data.

In [None]:
# printing the match stats dataframe

df_stats

In [None]:
# getting the line up list of both teams from the match stats dataframe

line_up = df_stats.loc['lineUp', 'liveData']
print(json.dumps(line_up, indent=4))

In [None]:
# getting team id from the line up list

team_id = line_up[0].get('contestantId')

In [None]:
# getting the players list of dictionaries from the line up list

players = line_up[0]['player']

In [None]:
# creating Pydantic player model, to check if the data type checking is handled correctly by Pydantic model or not.

player = PlayerModel(
    id=players[0].get('playerId'),
    first_name=players[0].get('firstName'),
    last_name=players[0].get('lastName'),
    short_first_name=players[0].get('shortFirstName'),
    short_last_name=players[0].get('shortLastName'),
    match_name=players[0].get('matchName'),
    shirt_number=players[0].get('shirtNumber'),
    position=players[0].get('position'),
    position_side=players[0].get('positionSide'),
    formation_place=players[0].get('formationPlace'),
    is_captain=players[0].get('captain'),
    team_id=team_id,
)

In [None]:
# getting the cards list of dictionaries from the match stats dataframe

cards = df_stats.loc['card', 'liveData']
print(json.dumps(cards, indent=4))

In [None]:
# creating Pydantic card model, to check if the data type checking is handled correctly by Pydantic model or not.

card = CardModel(
    match_id=df_events.loc['id', 'matchInfo'],
    contestant_id=cards[0].get('contestantId'),
    period_id=cards[0].get('periodId'),
    time_min=cards[0].get('timeMin'),
    time_min_sec=cards[0].get('timeMinSec'),
    last_updated=cards[0].get('lastUpdated'),
    timestamp=cards[0].get('timestamp'),
    type=cards[0].get('type'),
    player_id=cards[0].get('playerId'),
    player_name=cards[0].get('playerName'),
    opta_event_id=cards[0].get('optaEventId'),
    card_reason=cards[0].get('cardReason'),
)

In [None]:
# getting the goals list of dictionaries from the match stats dataframe

goals = df_stats.loc['goal', 'liveData']
print(json.dumps(goals, indent=4))

In [None]:
match_id = df_stats.loc['id', 'matchInfo']

In [None]:
# match_id=df_stats.loc['id', 'matchInfo']
# contestant_id=goals[0].get('contestantId')
# period_id=goals[0].get('periodId')
# time_min=goals[0].get('timeMin')
# time_min_sec=goals[0].get('timeMinSec')
# last_updated=goals[0].get('lastUpdated')
# timestamp=goals[0].get('timestamp')
# type=goals[0].get('type')
# scorer_id=goals[0].get('scorerId')
# scorer_name=goals[0].get('scorerName')
# assist_player_id=goals[0].get('assistPlayerId')
# assist_player_name=goals[0].get('assistPlayerName')
# opta_event_id=goals[0].get('optaEventId')
# home_score=goals[0].get('homeScore')
# away_score=goals[0].get('awayScore')

In [None]:
# creating Pydantic goal model, to check if the data type checking is handled correctly by Pydantic model or not.

goal = GoalModel(
    match_id=match_id,
    contestant_id=goals[0].get('contestantId'),
    period_id=goals[0].get('periodId'),
    time_min=goals[0].get('timeMin'),
    time_min_sec=goals[0].get('timeMinSec'),
    last_updated=goals[0].get('lastUpdated'),
    timestamp=goals[0].get('timestamp'),
    type=goals[0].get('type'),
    scorer_id=goals[0].get('scorerId'),
    scorer_name=goals[0].get('scorerName'),
    assist_player_id=goals[0].get('assistPlayerId'),
    assist_player_name=goals[0].get('assistPlayerName'),
    opta_event_id=goals[0].get('optaEventId'),
    home_score=goals[0].get('homeScore'),
    away_score=goals[0].get('awayScore'),
)