# Predict May - Exploratory Analysis

Analyzing Turkish SÃ¼per Lig data with DuckDB

In [None]:
import duckdb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# Connect to your DuckDB database
con = duckdb.connect('../data/football.duckdb')

# Check what tables exist
con.sql("SHOW TABLES").show()

In [None]:
# Query raw fixtures
fixtures = con.sql("""
    SELECT * 
    FROM raw.fixtures 
    ORDER BY date DESC 
    LIMIT 10
""").df()

fixtures

In [None]:
# Example: Goals per season
season_goals = con.sql("""
    SELECT 
        season,
        COUNT(*) as matches,
        SUM(home_goals + away_goals) as total_goals,
        ROUND(AVG(home_goals + away_goals), 2) as avg_goals_per_match
    FROM raw.fixtures
    GROUP BY season
    ORDER BY season
""").df()

season_goals

In [None]:
# Visualize
plt.figure(figsize=(10, 6))
sns.barplot(data=season_goals, x='season', y='avg_goals_per_match')
plt.title('Average Goals per Match by Season')
plt.ylabel('Goals per Match')
plt.xlabel('Season')
plt.show()

In [None]:
# Query dbt models (after running dbt)
# con.sql("SELECT * FROM staging.stg_fixtures LIMIT 10").df()