In [None]:
import sys
from pathlib import Path

# Add backend to path
sys.path.insert(0, str(Path.cwd()))

from backend.models.market import Market, MarketPair, load_markets_from_parquet
import pandas as pd

# Polymarket Markets Explorer

This notebook demonstrates working with **Pydantic models** for Polymarket data:

## Object-Oriented Approach

- **Market objects**: Each market is a `Market` instance with validation and helper methods
- **MarketPair objects**: Pairs of related markets as `MarketPair` instances
- **Type safety**: Pydantic provides validation and type checking
- **Rich methods**: Objects have useful methods like `is_open()`, `has_valid_odds()`, etc.

## Placeholder for Database

The parquet files are temporary placeholders for a future database:
- `markets_to_dataframe()` - Convert Market objects to DataFrame
- `save_markets_to_parquet()` - Save to parquet (placeholder for DB insert)
- `load_markets_from_parquet()` - Load from parquet (placeholder for DB query)

Eventually, these will be replaced with proper database operations.

In [None]:
# Load markets as Market objects (not just raw DataFrame)
markets = load_markets_from_parquet("data/markets.parquet")

print(f"Loaded {len(markets)} Market objects")
print(f"\nFirst market object:")
print(f"  Title: {markets[0].title}")
print(f"  ID: {markets[0].market_id[:30]}...")
print(f"  Yes odds: {markets[0].yes_odds}")
print(f"  No odds: {markets[0].no_odds}")
print(f"  Is open: {markets[0].is_open()}")
print(f"  Has valid odds: {markets[0].has_valid_odds()}")

# Can still convert to DataFrame when needed (placeholder for DB)
from backend.models.market import markets_to_dataframe
all_markets_df = markets_to_dataframe(markets)
all_markets_df.head()

In [None]:
# Working with Market objects - filter for open markets
open_markets = [m for m in markets if m.is_open()]
closed_markets = [m for m in markets if m.closed]

print(f"Open markets: {len(open_markets)}")
print(f"Closed markets: {len(closed_markets)}")
print(f"\nSample open market:")
if open_markets:
    m = open_markets[0]
    print(f"  {m.title}")
    print(f"  URL: {m.url}")
    print(f"  End date: {m.end_date}")
    
# Can still use DataFrame for analysis if needed
all_markets_df.closed.value_counts()

In [None]:
# Load market pairs as DataFrame (they're stored in flattened format)
# In the future, we could create a load_market_pairs_from_parquet function
# that reconstructs MarketPair objects from the flattened data
markets_pairs_df = pd.read_parquet("data/market_pairs.parquet")

print(f"Total market pairs: {len(markets_pairs_df)}")
print(f"\nFirst pair:")
print(f"  Keyword: {markets_pairs_df.iloc[0]['keyword']}")
print(f"  Market 1: {markets_pairs_df.iloc[0]['market1_title']}")
print(f"  Market 2: {markets_pairs_df.iloc[0]['market2_title']}")

markets_pairs_df.head()

In [None]:
# Example: Create MarketPair objects from the data
# Find all unique market1 titles for Iran keyword
iran_pairs_df = markets_pairs_df[markets_pairs_df["keyword"] == "Iran"]
print(f"Iran-related pairs: {len(iran_pairs_df)}")
print(f"Unique Iran market 1 titles: {iran_pairs_df.market1_title.nunique()}")

# Example of working with objects - filter markets by keyword
def find_markets_by_keyword(markets_list, keyword):
    """Find markets that contain a keyword in the title."""
    import re
    pattern = re.compile(rf'\b{keyword}\b', re.IGNORECASE)
    return [m for m in markets_list if pattern.search(m.title)]

iran_markets = find_markets_by_keyword(markets, "Iran")
print(f"\nFound {len(iran_markets)} markets with 'Iran' in title")

# Display some examples
print("\nSample Iran markets:")
for i, m in enumerate(iran_markets[:3]):
    print(f"{i+1}. {m.title}")
    print(f"   Yes: {m.yes_odds}, No: {m.no_odds}")
    print(f"   Open: {m.is_open()}")

In [None]:
# Converting objects back to DataFrame/Parquet (placeholder for DB)
from backend.models.market import market_pairs_to_dataframe

# Convert MarketPair objects to DataFrame
iran_pairs_df = market_pairs_to_dataframe(iran_pairs)

print(f"Converted {len(iran_pairs)} MarketPair objects to DataFrame")
print(f"\nDataFrame shape: {iran_pairs_df.shape}")
print("\nDataFrame columns:", list(iran_pairs_df.columns))

# Could save to parquet (placeholder for saving to database)
# from backend.models.market import save_market_pairs_to_parquet
# save_market_pairs_to_parquet(iran_pairs, "data/iran_pairs.parquet")

iran_pairs_df.head()

In [None]:
# Example: Create MarketPair objects from Iran markets
from itertools import combinations

# Get open Iran markets with valid odds
iran_open_markets = [m for m in iran_markets if m.is_open() and m.has_valid_odds()]
print(f"Open Iran markets with valid odds: {len(iran_open_markets)}")

# Create some MarketPair objects
iran_pairs = []
for i, (m1, m2) in enumerate(combinations(iran_open_markets[:5], 2)):
    pair = MarketPair(
        pair_id=f"Iran_{i+1:04d}",
        keyword="Iran",
        market1=m1,
        market2=m2
    )
    iran_pairs.append(pair)

print(f"\nCreated {len(iran_pairs)} MarketPair objects")

# Show first pair
if iran_pairs:
    pair = iran_pairs[0]
    print(f"\nSample pair:")
    print(f"  Pair ID: {pair.pair_id}")
    print(f"  Keyword: {pair.keyword}")
    print(f"  Market 1: {pair.market1.title[:60]}...")
    print(f"  Market 1 odds: Yes={pair.market1.yes_odds}, No={pair.market1.no_odds}")
    print(f"  Market 2: {pair.market2.title[:60]}...")
    print(f"  Market 2 odds: Yes={pair.market2.yes_odds}, No={pair.market2.no_odds}")
    print(f"  Both open: {pair.both_markets_open()}")
    print(f"  Both have valid odds: {pair.both_have_valid_odds()}")