# Gaming Events Social Scraper

This notebook scrapes gaming events from social media sources (Facebook and Discord) and saves structured data to JSON files.

## Features
- Parse Facebook public posts and events for gaming events
- Monitor Discord servers for event announcements
- Extract structured data: game system, venue, date, time
- Save events in consistent JSON schema
- Extensible for additional platforms

In [None]:
import json
import re
import requests
from datetime import datetime, timezone
from dataclasses import dataclass, asdict
from typing import List, Optional, Dict, Any
import os
from pathlib import Path

# For web scraping
from bs4 import BeautifulSoup

# For Discord (optional - requires bot setup)
try:
    import discord
    DISCORD_AVAILABLE = True
except ImportError:
    DISCORD_AVAILABLE = False
    print("Discord.py not available. Install with: pip install discord.py")

## Event Data Schema

Define the structure for gaming event data.

In [None]:
@dataclass
class GamingEvent:
    """Structured data for a gaming event"""
    title: str
    game_system: str  # e.g., "MTG", "Warhammer 40K", "D&D"
    venue: str        # Store/location name
    date: str         # ISO format date
    start_time: str   # Time in HH:MM format
    source: str       # "facebook" or "discord"
    source_url: Optional[str] = None
    description: Optional[str] = None
    extracted_at: str = None
    
    def __post_init__(self):
        if self.extracted_at is None:
            self.extracted_at = datetime.now(timezone.utc).isoformat()
    
    def to_dict(self) -> Dict[str, Any]:
        return asdict(self)

class EventExtractor:
    """Base class for event extraction"""
    
    def __init__(self):
        self.game_keywords = [
            "mtg", "magic", "magic the gathering",
            "warhammer", "40k", "age of sigmar",
            "d&d", "dungeons and dragons", "dnd",
            "pokemon", "yugioh", "digimon",
            "fnm", "friday night magic",
            "commander", "edh",
            "draft", "sealed", "prerelease"
        ]
        
        self.time_patterns = [
            r'(\d{1,2}):?(\d{2})\s*(am|pm)',
            r'(\d{1,2})\s*(am|pm)',
            r'(\d{1,2}):?(\d{2})'
        ]
        
        self.date_patterns = [
            r'(\d{1,2})/(\d{1,2})/(\d{4})',
            r'(\d{1,2})-(\d{1,2})-(\d{4})',
            r'(january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d{1,2})',
            r'(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\s+(\d{1,2})'
        ]
    
    def extract_game_system(self, text: str) -> str:
        """Extract game system from text"""
        text_lower = text.lower()
        
        if any(keyword in text_lower for keyword in ["mtg", "magic", "fnm", "commander", "edh", "draft", "sealed", "prerelease"]):
            return "MTG"
        elif any(keyword in text_lower for keyword in ["warhammer", "40k", "age of sigmar"]):
            return "Warhammer"
        elif any(keyword in text_lower for keyword in ["d&d", "dungeons", "dnd"]):
            return "D&D"
        elif "pokemon" in text_lower:
            return "Pokemon"
        elif "yugioh" in text_lower:
            return "Yu-Gi-Oh"
        
        return "Unknown"
    
    def extract_time(self, text: str) -> Optional[str]:
        """Extract time from text"""
        for pattern in self.time_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                groups = match.groups()
                if len(groups) == 3:  # HH:MM AM/PM
                    hour, minute, period = groups
                    return f"{hour}:{minute} {period.upper()}"
                elif len(groups) == 2 and groups[1] in ['am', 'pm']:  # HH AM/PM
                    hour, period = groups
                    return f"{hour}:00 {period.upper()}"
                elif len(groups) == 2:  # HH:MM (24hr)
                    hour, minute = groups
                    return f"{hour}:{minute}"
        return None
    
    def extract_date(self, text: str) -> Optional[str]:
        """Extract date from text and convert to ISO format"""
        for pattern in self.date_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                groups = match.groups()
                # Handle different date formats
                if len(groups) == 3 and groups[2].isdigit():  # MM/DD/YYYY
                    month, day, year = groups
                    return f"{year}-{month.zfill(2)}-{day.zfill(2)}"
                elif len(groups) == 2:  # Month DD
                    month_name, day = groups
                    month_map = {
                        'january': '01', 'jan': '01',
                        'february': '02', 'feb': '02',
                        'march': '03', 'mar': '03',
                        'april': '04', 'apr': '04',
                        'may': '05',
                        'june': '06', 'jun': '06',
                        'july': '07', 'jul': '07',
                        'august': '08', 'aug': '08',
                        'september': '09', 'sep': '09',
                        'october': '10', 'oct': '10',
                        'november': '11', 'nov': '11',
                        'december': '12', 'dec': '12'
                    }
                    month = month_map.get(month_name.lower())
                    if month:
                        current_year = datetime.now().year
                        return f"{current_year}-{month}-{day.zfill(2)}"
        return None

## Facebook Events Scraper

Extract gaming events from Facebook pages and posts.

In [None]:
class FacebookEventScraper(EventExtractor):
    """Scrape gaming events from Facebook"""
    
    def __init__(self, access_token: Optional[str] = None):
        super().__init__()
        self.access_token = access_token
        self.base_url = "https://graph.facebook.com/v18.0"
        
    def scrape_page_events(self, page_id: str) -> List[GamingEvent]:
        """Scrape events from a Facebook page"""
        events = []
        
        if not self.access_token:
            print("Warning: No Facebook access token provided. Using public scraping fallback.")
            return self._scrape_public_page(page_id)
        
        try:
            # Get page events via Graph API
            url = f"{self.base_url}/{page_id}/events"
            params = {
                'access_token': self.access_token,
                'fields': 'name,description,start_time,place'
            }
            
            response = requests.get(url, params=params)
            response.raise_for_status()
            
            data = response.json()
            
            for event_data in data.get('data', []):
                event = self._parse_facebook_event(event_data, 'facebook_api')
                if event:
                    events.append(event)
                    
        except Exception as e:
            print(f"Error scraping Facebook page {page_id}: {e}")
            
        return events
    
    def _scrape_public_page(self, page_id: str) -> List[GamingEvent]:
        """Fallback method to scrape public Facebook page"""
        events = []
        
        try:
            # Note: This is a simplified example - Facebook's public pages
            # are heavily JavaScript-rendered and may require selenium
            url = f"https://www.facebook.com/{page_id}/events"
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
            }
            
            response = requests.get(url, headers=headers)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Look for event-like content in the page
            # This is a simplified approach - real implementation would need
            # more sophisticated parsing
            text_content = soup.get_text()
            
            # Look for gaming-related events in the text
            lines = text_content.split('\n')
            for line in lines:
                if any(keyword in line.lower() for keyword in self.game_keywords):
                    event = self._parse_text_event(line, f"https://facebook.com/{page_id}")
                    if event:
                        events.append(event)
                        
        except Exception as e:
            print(f"Error scraping public Facebook page {page_id}: {e}")
            
        return events
    
    def _parse_facebook_event(self, event_data: Dict, source_url: str) -> Optional[GamingEvent]:
        """Parse Facebook event data into GamingEvent"""
        try:
            title = event_data.get('name', '')
            description = event_data.get('description', '')
            
            # Check if this is a gaming event
            combined_text = f"{title} {description}".lower()
            if not any(keyword in combined_text for keyword in self.game_keywords):
                return None
            
            game_system = self.extract_game_system(combined_text)
            
            # Extract venue
            venue = "Unknown"
            place = event_data.get('place', {})
            if isinstance(place, dict):
                venue = place.get('name', 'Unknown')
            
            # Parse date/time
            start_time_str = event_data.get('start_time', '')
            if start_time_str:
                try:
                    dt = datetime.fromisoformat(start_time_str.replace('Z', '+00:00'))
                    date = dt.date().isoformat()
                    start_time = dt.time().strftime('%H:%M')
                except:
                    date = self.extract_date(combined_text) or "Unknown"
                    start_time = self.extract_time(combined_text) or "Unknown"
            else:
                date = self.extract_date(combined_text) or "Unknown"
                start_time = self.extract_time(combined_text) or "Unknown"
            
            return GamingEvent(
                title=title,
                game_system=game_system,
                venue=venue,
                date=date,
                start_time=start_time,
                source="facebook",
                source_url=source_url,
                description=description
            )
            
        except Exception as e:
            print(f"Error parsing Facebook event: {e}")
            return None
    
    def _parse_text_event(self, text: str, source_url: str) -> Optional[GamingEvent]:
        """Parse event from text content"""
        game_system = self.extract_game_system(text)
        if game_system == "Unknown":
            return None
        
        date = self.extract_date(text) or "Unknown"
        start_time = self.extract_time(text) or "Unknown"
        
        return GamingEvent(
            title=text.strip()[:100],  # Limit title length
            game_system=game_system,
            venue="Unknown",
            date=date,
            start_time=start_time,
            source="facebook",
            source_url=source_url,
            description=text.strip()
        )

## Discord Events Scraper

Extract gaming events from Discord server channels.

In [None]:
class DiscordEventScraper(EventExtractor):
    """Scrape gaming events from Discord servers"""
    
    def __init__(self, bot_token: Optional[str] = None):
        super().__init__()
        self.bot_token = bot_token
        self.events = []
        
    async def scrape_server_events(self, guild_id: int, channel_names: List[str] = None) -> List[GamingEvent]:
        """Scrape events from Discord server channels"""
        if not DISCORD_AVAILABLE:
            print("Discord.py not available. Cannot scrape Discord events.")
            return []
        
        if not self.bot_token:
            print("No Discord bot token provided.")
            return []
        
        if channel_names is None:
            channel_names = ['events', 'schedule', 'announcements', 'general']
        
        intents = discord.Intents.default()
        intents.message_content = True
        
        client = discord.Client(intents=intents)
        
        @client.event
        async def on_ready():
            try:
                guild = client.get_guild(guild_id)
                if not guild:
                    print(f"Guild {guild_id} not found")
                    await client.close()
                    return
                
                for channel in guild.channels:
                    if (isinstance(channel, discord.TextChannel) and 
                        any(name.lower() in channel.name.lower() for name in channel_names)):
                        
                        print(f"Scanning channel: {channel.name}")
                        
                        async for message in channel.history(limit=100):
                            event = self._parse_discord_message(message)
                            if event:
                                self.events.append(event)
                
                await client.close()
                
            except Exception as e:
                print(f"Error scraping Discord server: {e}")
                await client.close()
        
        await client.start(self.bot_token)
        return self.events
    
    def _parse_discord_message(self, message) -> Optional[GamingEvent]:
        """Parse Discord message for gaming events"""
        try:
            content = message.content
            
            # Check if this contains gaming keywords
            if not any(keyword in content.lower() for keyword in self.game_keywords):
                return None
            
            game_system = self.extract_game_system(content)
            date = self.extract_date(content) or "Unknown"
            start_time = self.extract_time(content) or "Unknown"
            
            # Try to extract venue from message
            venue = "Unknown"
            if hasattr(message, 'guild') and message.guild:
                venue = message.guild.name
            
            return GamingEvent(
                title=content[:100] if len(content) > 100 else content,
                game_system=game_system,
                venue=venue,
                date=date,
                start_time=start_time,
                source="discord",
                source_url=f"https://discord.com/channels/{message.guild.id}/{message.channel.id}/{message.id}" if hasattr(message, 'guild') else None,
                description=content
            )
            
        except Exception as e:
            print(f"Error parsing Discord message: {e}")
            return None

## Event Storage and Management

Save and manage extracted events in JSON format.

In [None]:
class EventStorage:
    """Manage storage of gaming events"""
    
    def __init__(self, storage_dir: str = "events_data"):
        self.storage_dir = Path(storage_dir)
        self.storage_dir.mkdir(exist_ok=True)
        
    def save_events(self, events: List[GamingEvent], filename: Optional[str] = None) -> str:
        """Save events to JSON file"""
        if not filename:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"gaming_events_{timestamp}.json"
        
        filepath = self.storage_dir / filename
        
        events_data = {
            "scraped_at": datetime.now(timezone.utc).isoformat(),
            "total_events": len(events),
            "events": [event.to_dict() for event in events]
        }
        
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(events_data, f, indent=2, ensure_ascii=False)
        
        print(f"Saved {len(events)} events to {filepath}")
        return str(filepath)
    
    def load_events(self, filename: str) -> List[GamingEvent]:
        """Load events from JSON file"""
        filepath = self.storage_dir / filename
        
        if not filepath.exists():
            print(f"File {filepath} does not exist")
            return []
        
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        events = []
        for event_data in data.get('events', []):
            event = GamingEvent(**event_data)
            events.append(event)
        
        return events
    
    def get_events_by_game(self, game_system: str) -> List[GamingEvent]:
        """Get all events for a specific game system"""
        all_events = []
        
        for json_file in self.storage_dir.glob("*.json"):
            events = self.load_events(json_file.name)
            game_events = [e for e in events if e.game_system.lower() == game_system.lower()]
            all_events.extend(game_events)
        
        return all_events
    
    def get_upcoming_events(self, days_ahead: int = 30) -> List[GamingEvent]:
        """Get events happening in the next N days"""
        all_events = []
        
        for json_file in self.storage_dir.glob("*.json"):
            events = self.load_events(json_file.name)
            all_events.extend(events)
        
        # Filter by date
        today = datetime.now().date()
        cutoff_date = today + datetime.timedelta(days=days_ahead)
        
        upcoming = []
        for event in all_events:
            try:
                event_date = datetime.fromisoformat(event.date).date()
                if today <= event_date <= cutoff_date:
                    upcoming.append(event)
            except:
                continue  # Skip events with invalid dates
        
        # Sort by date
        upcoming.sort(key=lambda x: x.date)
        return upcoming

## Main Scraper Orchestrator

Coordinate scraping from multiple sources.

In [None]:
class GamingEventsScraper:
    """Main orchestrator for gaming events scraping"""
    
    def __init__(self, 
                 facebook_token: Optional[str] = None,
                 discord_token: Optional[str] = None,
                 storage_dir: str = "events_data"):
        
        self.facebook_scraper = FacebookEventScraper(facebook_token)
        self.discord_scraper = DiscordEventScraper(discord_token)
        self.storage = EventStorage(storage_dir)
        
    def scrape_all_sources(self, 
                          facebook_pages: List[str] = None,
                          discord_servers: List[Dict] = None) -> List[GamingEvent]:
        """Scrape events from all configured sources"""
        
        all_events = []
        
        # Scrape Facebook
        if facebook_pages:
            print("Scraping Facebook pages...")
            for page_id in facebook_pages:
                try:
                    events = self.facebook_scraper.scrape_page_events(page_id)
                    all_events.extend(events)
                    print(f"Found {len(events)} events from Facebook page {page_id}")
                except Exception as e:
                    print(f"Error scraping Facebook page {page_id}: {e}")
        
        # Scrape Discord (requires async)
        if discord_servers and DISCORD_AVAILABLE:
            print("Scraping Discord servers...")
            import asyncio
            
            async def scrape_discord():
                discord_events = []
                for server_config in discord_servers:
                    try:
                        guild_id = server_config['guild_id']
                        channels = server_config.get('channels', None)
                        events = await self.discord_scraper.scrape_server_events(guild_id, channels)
                        discord_events.extend(events)
                        print(f"Found {len(events)} events from Discord server {guild_id}")
                    except Exception as e:
                        print(f"Error scraping Discord server: {e}")
                return discord_events
            
            # Run async Discord scraping
            try:
                discord_events = asyncio.run(scrape_discord())
                all_events.extend(discord_events)
            except Exception as e:
                print(f"Error running Discord scraping: {e}")
        
        return all_events
    
    def scrape_and_save(self, 
                       facebook_pages: List[str] = None,
                       discord_servers: List[Dict] = None,
                       filename: Optional[str] = None) -> str:
        """Scrape events and save to file"""
        
        events = self.scrape_all_sources(facebook_pages, discord_servers)
        
        if not events:
            print("No events found")
            return ""
        
        # Remove duplicates based on title and date
        unique_events = []
        seen = set()
        
        for event in events:
            key = (event.title.lower().strip(), event.date, event.start_time)
            if key not in seen:
                unique_events.append(event)
                seen.add(key)
        
        print(f"Found {len(events)} total events, {len(unique_events)} unique")
        
        # Save to file
        filepath = self.storage.save_events(unique_events, filename)
        
        # Print summary
        self._print_summary(unique_events)
        
        return filepath
    
    def _print_summary(self, events: List[GamingEvent]):
        """Print summary of scraped events"""
        if not events:
            return
        
        print("\n=== EVENTS SUMMARY ===")
        
        # Group by game system
        by_game = {}
        for event in events:
            game = event.game_system
            if game not in by_game:
                by_game[game] = []
            by_game[game].append(event)
        
        for game, game_events in by_game.items():
            print(f"\n{game}: {len(game_events)} events")
            for event in sorted(game_events, key=lambda x: x.date)[:3]:  # Show first 3
                print(f"  - {event.title} | {event.date} {event.start_time} | {event.venue}")
            if len(game_events) > 3:
                print(f"  ... and {len(game_events) - 3} more")

## Configuration and Usage Examples

Configure your scraping targets and run the scraper.

In [None]:
# Configuration
# Add your API tokens here (optional - will use public scraping fallbacks)
FACEBOOK_ACCESS_TOKEN = None  # Get from Facebook Developers
DISCORD_BOT_TOKEN = None      # Get from Discord Developer Portal

# Configure scraping targets
FACEBOOK_PAGES = [
    # Add Facebook page IDs or usernames
    # Example: "yourlocalcardshop", "magicthegathering"
]

DISCORD_SERVERS = [
    # Add Discord server configurations
    # Example: {"guild_id": 123456789, "channels": ["events", "announcements"]}
]

# Initialize scraper
scraper = GamingEventsScraper(
    facebook_token=FACEBOOK_ACCESS_TOKEN,
    discord_token=DISCORD_BOT_TOKEN,
    storage_dir="gaming_events_data"
)

## Run the Scraper

Execute the scraping process.

In [None]:
# Run the scraper
print("Starting gaming events scraper...")

try:
    # Scrape and save events
    output_file = scraper.scrape_and_save(
        facebook_pages=FACEBOOK_PAGES,
        discord_servers=DISCORD_SERVERS
    )
    
    if output_file:
        print(f"\nEvents saved to: {output_file}")
    else:
        print("\nNo events were found or saved.")
        
except Exception as e:
    print(f"Error running scraper: {e}")

## Load and Analyze Saved Events

Work with previously scraped events.

In [None]:
# Load and analyze events
storage = EventStorage("gaming_events_data")

# Get upcoming events
upcoming = storage.get_upcoming_events(days_ahead=14)
print(f"\nUpcoming events in next 14 days: {len(upcoming)}")

for event in upcoming[:5]:  # Show first 5
    print(f"- {event.title}")
    print(f"  {event.game_system} | {event.date} {event.start_time} | {event.venue}")
    print(f"  Source: {event.source}")
    print()

# Get events by game
mtg_events = storage.get_events_by_game("MTG")
print(f"MTG events found: {len(mtg_events)}")

warhammer_events = storage.get_events_by_game("Warhammer")
print(f"Warhammer events found: {len(warhammer_events)}")

## Testing with Sample Data

Test the extraction logic with sample text.

In [None]:
# Test extraction with sample data
extractor = EventExtractor()

sample_texts = [
    "Friday Night Magic - Modern format at Card Kingdom, July 18th 7:00 PM",
    "Warhammer 40K tournament this Saturday 10 AM at Games Workshop",
    "D&D Adventurers League - Wed Aug 15 6:30pm at Local Game Store",
    "Pokemon League Challenge on 8/20/2024 starting at 12:00 PM"
]

print("Testing extraction logic:")
print("=" * 50)

for i, text in enumerate(sample_texts, 1):
    print(f"\nSample {i}: {text}")
    print(f"Game System: {extractor.extract_game_system(text)}")
    print(f"Date: {extractor.extract_date(text)}")
    print(f"Time: {extractor.extract_time(text)}")
    
    # Create a sample event
    event = GamingEvent(
        title=text,
        game_system=extractor.extract_game_system(text),
        venue="Test Venue",
        date=extractor.extract_date(text) or "Unknown",
        start_time=extractor.extract_time(text) or "Unknown",
        source="test"
    )
    
    print(f"Event JSON: {json.dumps(event.to_dict(), indent=2)}")

## Next Steps

To expand this scraper:

1. **Get API Credentials**:
   - Facebook: Create a Facebook App and get an access token
   - Discord: Create a Discord Bot and get a bot token

2. **Add More Sources**:
   - Reddit gaming communities
   - Twitter/X gaming accounts
   - Eventbrite gaming events
   - Local gaming store websites

3. **Improve Extraction**:
   - Train ML models for better text parsing
   - Add more game systems and keywords
   - Improve date/time parsing

4. **Add Features**:
   - Duplicate detection across sources
   - Event notifications
   - Calendar export
   - Web dashboard

5. **Storage Options**:
   - Database storage (PostgreSQL, MongoDB)
   - Cloud storage (AWS S3, Google Cloud)
   - Real-time updates