# üîç Football Data API Exploration

This notebook explores the Football-Data.org API to understand:
- Available endpoints and data structure
- Response formats and data types
- Rate limits and best practices
- Sample data for testing

In [None]:
# Import required libraries
import sys
import os
from pathlib import Path

# Add src directory to path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

import json
import pandas as pd
from dotenv import load_dotenv
from fetch_data import FootballDataFetcher

# Load environment variables
load_dotenv(Path.cwd().parent / '.env')

print("‚úÖ Imports successful!")

## 1. Initialize Data Fetcher

In [None]:
# Initialize fetcher
fetcher = FootballDataFetcher()

print(f"API Base URL: {fetcher.base_url}")
print(f"API Key configured: {'Yes' if fetcher.api_key else 'No'}")
print(f"Snapshot directory: {fetcher.snapshot_dir}")

## 2. Explore Available Competitions

In [None]:
# Fetch competitions
competitions_data = fetcher.fetch_competitions()

if competitions_data and 'competitions' in competitions_data:
    competitions_df = pd.DataFrame(competitions_data['competitions'])
    
    # Display key columns
    display_cols = ['id', 'name', 'code', 'type', 'emblem']
    available_cols = [col for col in display_cols if col in competitions_df.columns]
    
    print(f"\nüìä Found {len(competitions_df)} competitions\n")
    print(competitions_df[available_cols].head(10))
else:
    print("‚ùå No competitions data received")

## 3. Explore Match Data Structure

In [None]:
# Fetch matches for Premier League
matches_data = fetcher.fetch_matches(competition_code='PL', status='FINISHED')

if matches_data and 'matches' in matches_data:
    matches = matches_data['matches']
    print(f"\n‚öΩ Found {len(matches)} matches\n")
    
    if len(matches) > 0:
        # Display first match structure
        print("Sample Match Structure:")
        print(json.dumps(matches[0], indent=2, default=str)[:1000] + "...")
        
        # Create DataFrame
        matches_list = []
        for match in matches:
            matches_list.append({
                'id': match.get('id'),
                'date': match.get('utcDate'),
                'status': match.get('status'),
                'home_team': match.get('homeTeam', {}).get('name'),
                'away_team': match.get('awayTeam', {}).get('name'),
                'home_score': match.get('score', {}).get('fullTime', {}).get('home'),
                'away_score': match.get('score', {}).get('fullTime', {}).get('away')
            })
        
        matches_df = pd.DataFrame(matches_list)
        print("\nüìã Matches Summary:")
        print(matches_df.head())
else:
    print("‚ùå No matches data received")

## 4. Explore Top Scorers

In [None]:
# Fetch top scorers
scorers_data = fetcher.fetch_competition_scorers(competition_code='PL', limit=10)

if scorers_data and 'scorers' in scorers_data:
    scorers = scorers_data['scorers']
    print(f"\nüèÜ Top {len(scorers)} Scorers\n")
    
    scorers_list = []
    for scorer in scorers:
        player = scorer.get('player', {})
        team = scorer.get('team', {})
        scorers_list.append({
            'player_name': player.get('name'),
            'team': team.get('name'),
            'goals': scorer.get('goals'),
            'assists': scorer.get('assists'),
            'penalties': scorer.get('penalties')
        })
    
    scorers_df = pd.DataFrame(scorers_list)
    print(scorers_df)
else:
    print("‚ùå No scorers data received")

## 5. Explore Standings

In [None]:
# Fetch standings
standings_data = fetcher.fetch_competition_standings(competition_code='PL')

if standings_data and 'standings' in standings_data:
    standings = standings_data['standings']
    
    if len(standings) > 0 and 'table' in standings[0]:
        table = standings[0]['table']
        print(f"\nüìä League Table ({len(table)} teams)\n")
        
        table_list = []
        for position in table:
            team = position.get('team', {})
            table_list.append({
                'position': position.get('position'),
                'team': team.get('name'),
                'played': position.get('playedGames'),
                'won': position.get('won'),
                'drawn': position.get('draw'),
                'lost': position.get('lost'),
                'points': position.get('points'),
                'goal_diff': position.get('goalDifference')
            })
        
        table_df = pd.DataFrame(table_list)
        print(table_df.head(10))
else:
    print("‚ùå No standings data received")

## 6. Data Quality Assessment

In [None]:
print("\nüìã Data Quality Summary\n")
print("=" * 50)

if 'matches_df' in locals():
    print(f"\n‚úÖ Matches Data:")
    print(f"   - Total matches: {len(matches_df)}")
    print(f"   - Missing home scores: {matches_df['home_score'].isna().sum()}")
    print(f"   - Missing away scores: {matches_df['away_score'].isna().sum()}")
    print(f"   - Date range: {matches_df['date'].min()} to {matches_df['date'].max()}")

if 'scorers_df' in locals():
    print(f"\n‚úÖ Scorers Data:")
    print(f"   - Total scorers: {len(scorers_df)}")
    print(f"   - Total goals: {scorers_df['goals'].sum()}")
    print(f"   - Total assists: {scorers_df['assists'].sum()}")

if 'table_df' in locals():
    print(f"\n‚úÖ Standings Data:")
    print(f"   - Teams: {len(table_df)}")
    print(f"   - Total matches played: {table_df['played'].sum()}")
    print(f"   - Total goals: {table_df['points'].sum()} points distributed")

print("\n" + "=" * 50)

## 7. API Rate Limiting Test

In [None]:
import time

print("\nüîÑ Testing API Rate Limiting...\n")

start_time = time.time()
test_calls = 3

for i in range(test_calls):
    call_start = time.time()
    result = fetcher.fetch_competitions()
    call_duration = time.time() - call_start
    print(f"Call {i+1}: {call_duration:.2f}s")

total_duration = time.time() - start_time
avg_duration = total_duration / test_calls

print(f"\nüìä Rate Limiting Stats:")
print(f"   - Total time: {total_duration:.2f}s")
print(f"   - Average per call: {avg_duration:.2f}s")
print(f"   - Configured delay: {fetcher.request_delay}s")

## 8. Summary and Next Steps

### Key Findings:
1. API provides comprehensive football data
2. Rate limiting is enforced (6 seconds between requests)
3. Data structure is consistent and well-documented
4. Player-level statistics require premium tier

### Next Steps:
1. ‚úÖ Proceed with data collection using available endpoints
2. ‚úÖ Store data in SQLite database
3. ‚úÖ Build analytics pipeline
4. ‚úÖ Create dashboard visualizations

### Limitations:
- Free tier: 10 requests per minute
- Player stats not available in free tier
- Historical data limited to recent matches

### Recommendations:
- Focus on team-level and match-level analytics
- Use top scorers endpoint for player insights
- Consider upgrading to premium tier for detailed player stats