FETCH MARKETS USING Gamma Market API 

In [7]:
import requests
import pandas as pd
import json
import time

def fetch_polymarket_data(batch_size=500):
    """
    Fetch all Polymarket data using pagination
    
    Args:
        batch_size (int): Number of markets to fetch per request
    """
    try:
        all_markets = []
        offset = 0
        total_fetched = 0
        
        while True:
            # Make request with current offset
            url = f"https://gamma-api.polymarket.com/events?closed=false&limit={batch_size}&offset={offset}"
            print(f"Fetching events with offset {offset}...")
            
            r = requests.get(url)
            markets_list = r.json()
            
            # If no markets returned, we've reached the end
            if not markets_list:
                print("No more events available")
                break
            
            all_markets.extend(markets_list)
            total_fetched += len(markets_list)
            print(f"Fetched {len(markets_list)} events. Total so far: {total_fetched}")
            
            # If we got fewer markets than the batch size, we've reached the end
            if len(markets_list) < batch_size:
                break
                
            # Increment offset for next batch
            offset += batch_size
            
            # Small delay to be nice to the API
            time.sleep(0.5)
        
        # Convert to DataFrame
        df = pd.DataFrame(all_markets)
        
        # Sort by volume if available
        if 'volume' in df.columns:
            df = df.sort_values('volume', ascending=False)
        
        # Remove duplicates
        df = df.drop_duplicates(subset='id')
            
        print(f"\nTotal unique events fetched: {len(df)}")
        
        return df
        
    except Exception as e:
        print(f"Error fetching data: {str(e)}")
        return pd.DataFrame()

# Fetch and display the data
polymarket_df = fetch_polymarket_data(batch_size=500)
pd.set_option('display.max_columns', None)  # Show all columns
polymarket_df.head(10)

# Save to CSV
polymarket_df.to_csv('polymarket_events.csv', index=False)

Fetching events with offset 0...
Fetched 500 events. Total so far: 500
Fetching events with offset 500...
Fetched 121 events. Total so far: 621

Total unique events fetched: 621


Filter Markets to get only those of interest

In [8]:
##############################################################
# VOLUME FILTERING ############################################
##############################################################
# Convert volume to numeric, handling any non-numeric values
polymarket_df['volume'] = pd.to_numeric(polymarket_df['volume'], errors='coerce')

# Sort all values of volume in descending order
polymarket_df = polymarket_df.sort_values('volume', ascending=False)

# Filter to only include markets with volume greater than 200000
polymarket_df = polymarket_df[polymarket_df['volume'] > 1000000]

##############################################################
# GET SPECIFIC MARKETS #######################################
##############################################################
from ollama import chat
from ollama import ChatResponse
import ast

# Get unique labels to know what events to get
unique_labels = []
for i in polymarket_df['tags']:
    for j in i:
        if j["label"] not in unique_labels:
            unique_labels.append(j["label"])
print(len(unique_labels))


# Create the prompt
prompt = """Analyze these labels and identify those related to politics or finance, and make sure to AVOID any labels related to sports or entertainment.
Return ONLY a Python list containing the relevant labels, formatted exactly like this:
['label1', 'label2', 'label3']
No other text or explanation, just the list.
Labels: """ + str(unique_labels)

def get_relevant_labels(unique_labels, num_passes=3):
    all_relevant_labels = set()
    
    for _ in range(num_passes):
        response = chat(
            model='llama3.2',
            messages=[{
                'role': 'user',
                'content': prompt
            }],
        )
        
        try:
            labels = ast.literal_eval(response.message.content.strip())
            all_relevant_labels.update(labels)
        except (SyntaxError, ValueError) as e:
            print(f"Error parsing response: {e}")
            content = response.message.content.strip()
            content = content.replace('[', '').replace(']', '')
            labels = [label.strip().strip("'\"") for label in content.split(',')]
            all_relevant_labels.update(labels)
        
        time.sleep(1)  # Add small delay between requests
    
    return list(all_relevant_labels)

# Use the function
relevant_labels = get_relevant_labels(unique_labels)
print(f"Found {len(relevant_labels)} relevant labels:", relevant_labels)



177
Found 47 relevant labels: ['Iran', 'Zerohedge', 'International Relations', 'Russian Politics', 'Ukraine', 'Politics', 'Strategic Bitcoin Reserve', 'Prediction Markets', 'Macro Fed', 'Crypto Prices', 'Gov Shutdown', 'Yoon Suk Yeol', 'Crypto', 'Putin', 'prediction markets', 'Macro Geopolitics', 'Investments', 'Ethereum', 'World Elections', 'Stablecoins', 'US Election', 'Zelensky', 'Finance', 'Pete Hegseth', 'Middle East', 'Investment', 'Fed Rates', 'Trump Cabinet', 'Business', 'Elections', 'Commodities', 'Bitcoin', 'Trump Presidency', 'Economy', 'Ron Desantis', 'Global Politics', 'Global Elections', 'Macro Election 2', 'Markets', 'Geopolitics', 'Markt Cap', 'NATO', 'Russia', 'EU', 'Stock', 'finance', 'Creators']


Get columns of interest: clob token id, question, volume, active, description, tags, price, rewards, events

In [9]:
# parse the tags dictionary to filter those that are relevant
# Create an empty list to store the rows we want to keep
relevant_rows = []

# Iterate through the original dataframe
for index, row in polymarket_df.iterrows():
    # Check if any tag in this row's tags matches our relevant_labels
    tags = row['tags']
    if any(tag['label'] in relevant_labels for tag in tags):
        relevant_rows.append(row)

# Create new dataframe from the relevant rows
df = pd.DataFrame(relevant_rows)

# Reset the index of the new dataframe
df = df.reset_index(drop=True)

print(f"Found {len(df)} markets with relevant tags")
df.head(10)

markets_of_interest = df

Found 64 markets with relevant tags


In [10]:
for i in df['markets']:
    print(i)


[{'id': '511755', 'question': 'Will someone else be inaugurated?', 'conditionId': '0x21559998482bf5dfbcaf0f5409edd34eebd84c8998df622f9da8ff1e1f248a26', 'slug': 'will-someone-else-be-inaugurated', 'resolutionSource': '', 'endDate': '2025-01-20T12:00:00Z', 'liquidity': '167764.52061', 'startDate': '2024-11-01T22:46:40.903Z', 'image': 'https://polymarket-upload.s3.us-east-2.amazonaws.com/who-will-be-inaugurated-Fnvq3D1ySaZa.jpg', 'icon': 'https://polymarket-upload.s3.us-east-2.amazonaws.com/who-will-be-inaugurated-Fnvq3D1ySaZa.jpg', 'description': 'This market will resolve to "Yes" if someone other than Kamala Harris or Donald J. Trump is inaugurated as President of the United States. Otherwise, this market will resolve to "No".\n\nInauguration day is currently scheduled for January 20, 2025. If inauguration day is delayed, this market will remain open until someone has been inaugurated. If no one has been inaugurated by January 31, 2025, 11:59 PM ET, this market will resolve to "Yes". \n

Access the CLIENT CLOB of POLYMARKET to get access to GOLDSKY

In [11]:
# Standard library imports
import time
import json
from typing import Dict, Any

# Third-party imports
import pandas as pd
import requests
from py_clob_client.client import ClobClient
from dotenv import load_dotenv
import os

# Load environment variables (if you're using .env file for API keys)
load_dotenv('keys/keys.env')


host = "https://clob.polymarket.com"
key = os.getenv("PK")
chain_id = 137  # Polygon Mainnet chain ID

# Ensure the private key is loaded correctly
if not key:
    raise ValueError("Private key not found. Please set PK in the environment variables.")

# Initialize the client with your private key
client = ClobClient(host, key=key, chain_id=chain_id)

# Create or derive API credentials (this is where the API key, secret, and passphrase are generated)
try:
    api_creds = client.create_or_derive_api_creds()
    print("API Key:", api_creds.api_key)
    print("Secret:", api_creds.api_secret)
    print("Passphrase:", api_creds.api_passphrase)

    # You should now save these securely (e.g., store them in your .env file)
except Exception as e:
    print("Error creating or deriving API credentials:", e)

# YOU WILL HAVE TO RUN THIS CODE THE FIRST TIME TO GET THE API KEYS ????
# from py_clob_client.client import ClobClient
# host = "https://clob.polymarket.com"
# private_key = "5c98489d9d3612b7c5f7a91d2c249c808623d212b3266b34deda44480a08b50d"
# chain_id = 137  # Polygon Mainnet
# # Initialize the client with private key
# client = ClobClient(host, key=private_key, chain_id=chain_id)
# api_key_data = client.create_api_key()
# print(api_key_data)
print("Done!")


API Key: 649c5b96-f29f-46c0-421e-9696432f52d4
Secret: XR9Q5lEpMghhgsGHltR4hUP32Pv3EMoiXN-BaKoMnqs=
Passphrase: 5c98489d9d3612b7c5f7a91d2c249c808623d212b3266b34deda44480a08b50d
Done!


In [17]:
import os
import json
import pandas as pd
from datetime import datetime, timedelta
import requests
import matplotlib.pyplot as plt
# Make sure these imports are at the top of your file
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patheffects as pe
from matplotlib.ticker import PercentFormatter


class PolymarketDataFetcher:
    def __init__(self):
        self.orders_endpoint = "https://api.goldsky.com/api/public/project_cl6mb8i9h0003e201j6li0diw/subgraphs/polymarket-orderbook-resync/prod/gn"

    def fetch_all_order_history(self, condition_id, start_date="2023-10-08", batch_size=1000):
        """
            Fetch complete order history for a market since a specific date
            
        Args:
            condition_id (str): Market ID
            start_date (str): Start date in YYYY-MM-DD format
            batch_size (int): Number of orders to fetch per request
        """
        all_orders = []
        offset = 0
        
        # Convert start_date to Unix timestamp
        start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp())
        
        while True:
            query = """
            query OrderHistory($conditionId: String!, $limit: Int!, $offset: Int!, $startTime: Int!) {
            enrichedOrderFilleds(
                first: $limit
                skip: $offset
                where: {
                market: $conditionId
                timestamp_gte: $startTime
                }
                orderBy: timestamp
                orderDirection: asc
            ) {
                timestamp
                price
                size
                side
            }
            }
            """
            
            result = self._make_request(query, {
                'conditionId': condition_id,
                'limit': batch_size,
                'offset': offset,
                'startTime': start_timestamp
            })
            
            if not result or 'enrichedOrderFilleds' not in result or not result['enrichedOrderFilleds']:
                break
                
            orders = result['enrichedOrderFilleds']
            all_orders.extend(orders)
            
            print(f"Fetched {len(orders)} orders. Total: {len(all_orders)}")
            
            if len(orders) < batch_size:
                break
                
            offset += batch_size
        
        if not all_orders:
            print("No orders found")
            return None
            
        # Convert to DataFrame
        df = pd.DataFrame(all_orders)
        
        # Convert timestamp to datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'].astype(int), unit='s')
        
        # Convert price and size to numeric
        df['price'] = pd.to_numeric(df['price'])
        df['size'] = pd.to_numeric(df['size'])
        
        # Sort by timestamp
        df = df.sort_values('timestamp')
        
        print(f"\nData range: {df['timestamp'].min()} to {df['timestamp'].max()}")
        
        return df

    def _make_request(self, query, variables):
        try:
            response = requests.post(
                self.orders_endpoint,
                json={'query': query, 'variables': variables}
            )
            response.raise_for_status()
            return response.json().get('data', {})
        except Exception as e:
            print(f"Error making request: {str(e)}")
            return None

def plot_price_history(df, title="Market Price History", resample='1H'):
    """Plot smooth price history with Polymarket-style appearance"""
    # Create figure with white background
    plt.style.use('default')  # Reset to default style
    fig, ax = plt.subplots(figsize=(15, 8))
    
    # Resample data to smooth the line
    df_resampled = df.set_index('timestamp').resample(resample).agg({
        'price': 'last',
        'size': 'sum'
    }).fillna(method='ffill')
    
    # Plot the price line with smoothing
    ax.plot(df_resampled.index, df_resampled['price'], 
            color='#0066FF',  # Polymarket blue
            linewidth=2,
            solid_capstyle='round',
            path_effects=[pe.SimpleLineShadow(shadow_color='gray', alpha=0.2),
                         pe.Normal()])
    
    # Add subtle grid
    ax.grid(True, linestyle='--', alpha=0.2, color='gray')
    ax.set_facecolor('white')
    fig.patch.set_facecolor('white')
    
    # Format x-axis to show dates nicely
    ax.xaxis.set_major_locator(mdates.AutoDateLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
    plt.xticks(rotation=0)
    
    # Remove top and right spines
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_alpha(0.3)
    ax.spines['bottom'].set_alpha(0.3)
    
    # Add subtle source attribution
    plt.text(0.01, 0.02, 'Source: Polymarket.com', 
             transform=ax.transAxes, 
             color='gray', 
             alpha=0.6,
             fontsize=8)
    
    # Adjust layout
    plt.tight_layout()
    
    return fig

if __name__ == "__main__":
    fetcher = PolymarketDataFetcher()
    
    # Your market ID
    clobTokenId = "55223339147513557002753346210723654663683660449692044699329423663012565950662"
    
    print("\nFetching complete order history since Oct 8...")
    df = fetcher.fetch_all_order_history(
        clobTokenId,
        start_date="2023-10-08",  # Specify the start date here
        batch_size=1000
    )
    
    if df is not None:
        # Save raw data
        df.to_csv(f'price_history_{clobTokenId[-8:]}.csv', index=False)
        print(f"\nData saved to price_history_{clobTokenId[-8:]}.csv")
        
        # Create price history plot
        fig = plot_price_history(df, resample='1H')  # Adjust resampling as needed
        
        # Save plot
        fig.savefig(f'price_history_{clobTokenId[-8:]}.png')
        print(f"Plot saved to price_history_{clobTokenId[-8:]}.png")


Fetching complete order history since Oct 8...
Fetched 1000 orders. Total: 1000
Fetched 1000 orders. Total: 2000
Fetched 1000 orders. Total: 3000
Fetched 1000 orders. Total: 4000
Fetched 1000 orders. Total: 5000


KeyboardInterrupt: 