In [0]:
from src.schemas.fields import (
    TableNames, CommonFields, FixtureStatsFields, FixtureFields,
    DateFields, TeamFields, LeagueFields, FactMatchStatisticFields
)
from src.schemas.fact_match_statistics_schema import FactMatchStatisticsSchema
from pyspark.sql.functions import col, row_number
from pyspark.sql.window import Window
from pyspark.sql.types import LongType
from src.utils.football_utils import DataFrameFootballUtils

In [0]:
%sql
USE CATALOG `football-analyze-v1`;
USE SCHEMA `football`;

In [0]:
dim_fixtures_df = spark.read.table(f"gold.{TableNames.DIM_FIXTURES}")
dim_fixture_stats_df = spark.read.table(f"gold.{TableNames.DIM_FIXTURE_STATS}")
dim_teams_df = spark.read.table(f"gold.{TableNames.DIM_TEAMS}")
dim_leagues_df = spark.read.table(f"gold.{TableNames.DIM_LEAGUES}")
dim_dates_df = spark.read.table(f"gold.{TableNames.DIM_DATES}")

# Join dim tables
fact_match_stats_df = (
    dim_fixture_stats_df.alias("dim_fixture_stats")
    .join(
        dim_teams_df.alias("dim_team"),
        col(f'dim_team.{CommonFields.TEAM_ID}') == col(f'dim_fixture_stats.{CommonFields.TEAM_ID}'),
        "left"
    )
    .join(
        dim_fixtures_df.alias("dim_fixtures"),
        col(f'dim_fixtures.{CommonFields.FIXTURE_ID}') == col(f'dim_fixture_stats.{CommonFields.FIXTURE_ID}'),
        "left"
    )
    .join(
        dim_leagues_df.alias("dim_leagues"),
        col(f'dim_leagues.{CommonFields.LEAGUE_ID}') == col(f'dim_fixtures.{CommonFields.LEAGUE_ID}'),
        "left"
    )
    .join(
        dim_dates_df.alias("dim_dates"),
        col(f'dim_dates.{DateFields.DATE}') == col(f'dim_fixtures.{FixtureFields.DATE}'),
        "left"
    )
)

fact_match_stats_df.show(2)