In [2]:
import requests
import pandas as pd
import time  # Add this import
import random  # Add this for randomizing sleep time

BASE_URL = "https://gamma-api.polymarket.com/markets"

all_markets = []
offset = 0
max_retries = 5  # Set a maximum number of retries
max_offset = 10000  # Set a reasonable maximum to avoid infinite loopsi

while offset < max_offset:
    params = {"limit": 100, "offset": offset}
    
    # Add retry logic
    for retry in range(max_retries):
        try:
            # Add a delay between requests to avoid rate limiting
            time.sleep(random.uniform(1.0, 2.0))
            
            r = requests.get(BASE_URL, params=params)
            r.raise_for_status()
            batch = r.json()
            
            if not batch:
                max_offset = 0  # This will exit the outer loop
                break
                
            all_markets.extend(batch)
            offset += 100
            print(f"Successfully fetched batch at offset {offset-100}, got {len(batch)} markets")
            break  # Success, exit retry loop
            
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 429:
                # If rate limited, wait longer before retrying
                wait_time = 5 + random.uniform(5.0, 15.0)
                print(f"Rate limited. Waiting {wait_time:.2f} seconds before retry {retry+1}/{max_retries}")
                time.sleep(wait_time)
                continue
            elif retry == max_retries - 1:
                print(f"Failed after {max_retries} retries: {e}")
                max_offset = 0  # Exit outer loop
                break
            else:
                print(f"Error: {e}. Retrying ({retry+1}/{max_retries})...")
                time.sleep(2)
        except Exception as e:
            print(f"Unexpected error: {e}")
            if retry == max_retries - 1:
                max_offset = 0  # Exit outer loop
                break
            time.sleep(2)

# Filter ETH markets only
eth_markets = []
keywords = ["eth ", " eth ", "ETH ", " ETH ", "ethereum", "ethusd", "Ethereum", "Ethereum "]

for m in all_markets:
    question = m.get("question", "").lower()
    print(m.get("start_date"))
    if any(k in question for k in keywords):
        eth_markets.append({
            "id": m.get("id"),
            "question": m.get("question"),
            "slug": m.get("slug"),
            "start_date": m.get("start_date"),
            "end_date": m.get("end_date"),
            "volume": m.get("volume_num"),
            "liquidity": m.get("liquidity_num"),
            "active": m.get("active"),
            "closed": m.get("closed")
        })

# Save filtered ETH markets
df = pd.DataFrame(eth_markets)
df.to_csv("main_eth_betting_markets.csv", index=False)
print(f"✅ Saved {len(df)} ETH markets.")
df.head(10)

Successfully fetched batch at offset 0, got 100 markets
Successfully fetched batch at offset 100, got 100 markets
Successfully fetched batch at offset 200, got 100 markets
Successfully fetched batch at offset 300, got 100 markets
Successfully fetched batch at offset 400, got 100 markets
Successfully fetched batch at offset 500, got 100 markets
Successfully fetched batch at offset 600, got 100 markets
Successfully fetched batch at offset 700, got 100 markets
Successfully fetched batch at offset 800, got 100 markets
Successfully fetched batch at offset 900, got 100 markets
Successfully fetched batch at offset 1000, got 100 markets
Successfully fetched batch at offset 1100, got 100 markets
Successfully fetched batch at offset 1200, got 100 markets
Successfully fetched batch at offset 1300, got 100 markets
Successfully fetched batch at offset 1400, got 100 markets
Successfully fetched batch at offset 1500, got 100 markets
Successfully fetched batch at offset 1600, got 100 markets
Successfu

Unnamed: 0,id,question,slug,start_date,end_date,volume,liquidity,active,closed
0,75,Will the Ethereum 2.0 Genesis Event happen suc...,will-the-ethereum-20-genesis-event-happen-succ...,,,,,True,True
1,8938,"Will ETH be above $1,500 on January 27th?",will-eth-be-above-1-500-on-january-27th,,,,,True,True
2,61328,"Will ETH be above $1500 on February 3rd, 2021?",will-eth-be-above-1500-on-february-3rd-2021,,,,,True,True
3,71914,"Will ETH be above $2000 on March 1st, 2021?",will-eth-be-above-2000-on-march-1st-2021,,,,,True,True
4,79704,Will the average Ethereum gas price be below 1...,will-the-average-ethereum-gas-price-be-below-1...,,,,,True,True
5,98033,"Will ETH be above $2000 on April 1st, 2021?",will-eth-be-above-2000-on-april-1st-2021,,,,,True,True
6,98685,"Will ETH be above $1500 on March 7th, 2021?",will-eth-be-above-1500-on-march-7th-2021,,,,,True,True
7,101793,"Will ETH be above $1750 on March 22nd, 2021?",will-eth-be-above-1750-on-march-22nd-2021-1,,,,,True,True
8,101817,Will the average Ethereum gas price be below 1...,will-the-average-ethereum-gas-price-be-below-1...,,,,,True,True
9,107931,Will the average Ethereum gas price be below 1...,will-the-average-ethereum-gas-price-be-below-1...,,,,,True,True


In [12]:
from datetime import datetime
from decimal import Decimal


def iso_to_dt(ts):
    """Convert ISO 8601 → pandas‑friendly datetime; returns None if blank."""
    if not ts:
        return None
    return pd.to_datetime(ts, utc=True)

# Filter ETH markets only
eth_markets = []
keywords = ["eth ", " eth ", "ETH ", " ETH ", "ethereum", "ethusd", "Ethereum", "Ethereum "]

i = 0
for m in all_markets:
    if i == 0:
        question
    question = m.get("question", "").lower()
    if any(k in question for k in keywords):
        eth_markets.append({
            "id": m.get("id"),
            "question": m.get("question"),
            "slug": m.get("slug"),

            "start_date": iso_to_dt(m.get("startDate")),
            "end_date"  : iso_to_dt(m.get("endDate")),

            "volume"    : Decimal(m.get("volumeNum", 0)),
            "liquidity" : Decimal(m.get("liquidityNum", 0)),

            "active"    : bool(m.get("active")),
            "closed"    : bool(m.get("closed")),
        })


# Save filtered ETH markets
df = pd.DataFrame(eth_markets)
df.to_csv("main_eth_betting_markets.csv", index=False)
print(f"✅ Saved {len(df)} ETH markets.")
df.head(10)

✅ Saved 332 ETH markets.


Unnamed: 0,id,question,slug,start_date,end_date,volume,liquidity,active,closed
0,75,Will the Ethereum 2.0 Genesis Event happen suc...,will-the-ethereum-20-genesis-event-happen-succ...,NaT,2020-12-02 00:00:00+00:00,560270.42,7.87999999999999989341858963598497211933135986...,True,True
1,8938,"Will ETH be above $1,500 on January 27th?",will-eth-be-above-1-500-on-january-27th,NaT,2021-01-28 00:00:00+00:00,759196.24,0,True,True
2,61328,"Will ETH be above $1500 on February 3rd, 2021?",will-eth-be-above-1500-on-february-3rd-2021,NaT,2021-02-03 00:00:00+00:00,685915.48,0.01000000000000000020816681711721685132943093...,True,True
3,71914,"Will ETH be above $2000 on March 1st, 2021?",will-eth-be-above-2000-on-march-1st-2021,NaT,2021-03-01 00:00:00+00:00,747989.5999999999,20.4800000000000004263256414560601115226745605...,True,True
4,79704,Will the average Ethereum gas price be below 1...,will-the-average-ethereum-gas-price-be-below-1...,NaT,2021-02-16 00:00:00+00:00,94610.61,1,True,True
5,98033,"Will ETH be above $2000 on April 1st, 2021?",will-eth-be-above-2000-on-april-1st-2021,NaT,2021-04-01 00:00:00+00:00,200394.99,0,True,True
6,98685,"Will ETH be above $1500 on March 7th, 2021?",will-eth-be-above-1500-on-march-7th-2021,NaT,2021-03-07 00:00:00+00:00,451637.39,1.67999999999999993782751062099123373627662658...,True,True
7,101793,"Will ETH be above $1750 on March 22nd, 2021?",will-eth-be-above-1750-on-march-22nd-2021-1,NaT,2021-03-22 00:00:00+00:00,354495.5,100.040000000000006252776074688881635665893554...,True,True
8,101817,Will the average Ethereum gas price be below 1...,will-the-average-ethereum-gas-price-be-below-1...,NaT,2021-03-20 00:00:00+00:00,22745.07,655.1399999999999863575794734060764312744140625,True,True
9,107931,Will the average Ethereum gas price be below 1...,will-the-average-ethereum-gas-price-be-below-1...,NaT,2021-04-05 00:00:00+00:00,53373.66,998,True,True


In [34]:
import json
import requests 
import ast

def get_market_outcomes(market_id):
    url = f"https://gamma-api.polymarket.com/markets/{market_id}"
    r = requests.get(url)
    if r.status_code != 200:
        return None
    market = r.json()

    print(json.dumps(market, indent=2))
    
    outcomes_raw = market.get("outcomes", [])
    outcomes = ast.literal_eval(outcomes_raw)

    prices_raw = market.get("outcomePrices", [])
    prices = ast.literal_eval(prices_raw)
    for i in range(len(outcomes)):
        name = outcomes[i]
        price = float(prices[i]) * 100
        print(f"{name}: {price:.2f}%")

    return outcomes, prices

# Example: get outcome prices for one market
outcomes, prices = get_market_outcomes("101817")  # replace with real ETH market ID

print(outcomes)
print(prices)

{
  "id": "101817",
  "question": "Will the average Ethereum gas price be below 130 Gwei on March 20?",
  "conditionId": "0xb63716ba12185659db0b79794a737534013b1ad2ccfab1cd8d03e801ab8240db",
  "slug": "will-the-average-ethereum-gas-price-be-below-130-gwei-on-march-20",
  "resolutionSource": "https://etherscan.io/chart/gasprice",
  "endDate": "2021-03-20T00:00:00Z",
  "category": "Crypto",
  "liquidity": "655.143559",
  "fee": "20000000000000000",
  "image": "",
  "icon": "",
  "description": "This is a market on whether the average Ethereum gas price will be below 130 Gwei on March 20, 2021. This market will resolve to \"Yes\" if Avg Gas Price is listed as being below 130 Gwei for that date, on Etherscan. If Avg Gas Price is 130 Gwei or higher, this market will resolve to \"No\". The resolution source for this market is https://etherscan.io/chart/gasprice. This market will resolve when data is available for the date of March 20, 2021. In the event of ambiguity in terms of the market ou

In [35]:
import re

def extract_price_threshold(question):
    match = re.search(r"\$\s?([\d,]+)", question)
    if match:
        price_str = match.group(1).replace(",", "")
        return float(price_str)
    return None

question = "Will ETH be above $1510 on April 1st, 2021?	"
print(extract_price_threshold(question))

1510.0


In [36]:
# Save to CSV
df.to_csv("polymarket_crypto_markets.csv", index=False)

In [37]:
pip install tqdm ipywidgets

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [38]:
df = pd.read_csv("eth_betting_markets.csv")
df

Unnamed: 0,id,question,slug,start_date,end_date,volume,liquidity,active,closed
0,75,Will the Ethereum 2.0 Genesis Event happen suc...,will-the-ethereum-20-genesis-event-happen-succ...,,,,,True,True
1,8938,"Will ETH be above $1,500 on January 27th?",will-eth-be-above-1-500-on-january-27th,,,,,True,True
2,61328,"Will ETH be above $1500 on February 3rd, 2021?",will-eth-be-above-1500-on-february-3rd-2021,,,,,True,True
3,71914,"Will ETH be above $2000 on March 1st, 2021?",will-eth-be-above-2000-on-march-1st-2021,,,,,True,True
4,79704,Will the average Ethereum gas price be below 1...,will-the-average-ethereum-gas-price-be-below-1...,,,,,True,True
...,...,...,...,...,...,...,...,...,...
761,533807,Ethereum Up or Down on April 5?,ethereum-up-or-down-on-april-5,,,,,True,True
762,534111,Ethereum Up or Down on April 6?,ethereum-up-or-down-on-april-6,,,,,True,True
763,534112,Ethereum Up or Down on April 7?,ethereum-up-or-down-on-april-7,,,,,True,False
764,534113,Ethereum Up or Down on April 8?,ethereum-up-or-down-on-april-8,,,,,True,False


In [39]:
# Modified code for your notebook (without notebook-specific tqdm)
import pandas as pd
import json
import requests
import ast
import time
from tqdm import tqdm  # Using standard tqdm instead of notebook version

# Load the existing dataframe with ETH markets
# If you're continuing from above, df is already defined
# Otherwise, uncomment the line below to load it from CSV
# df = pd.read_csv("eth_betting_markets.csv")

# Function to get market details
def get_full_market_data(market_id):
    url = f"https://gamma-api.polymarket.com/markets/{market_id}"
    r = requests.get(url)
    if r.status_code != 200:
        return None
    return r.json()

# New columns to extract from the detailed market data
additional_columns = [
    "outcomes", "outcomePrices", "volumeNum", "liquidityNum", 
    "endDateIso", "volume24hr", "clobTokenIds", "fpmmLive", 
    "competitive", "approved", "spread", "oneDayPriceChange", 
    "lastTradePrice", "bestBid", "bestAsk"
]

# Initialize new columns
for col in additional_columns:
    df[col] = None

# Create columns for outcome names and prices
# We'll assume a maximum of 3 outcomes per market (adjust if needed)
for i in range(1, 4):
    df[f"outcome_{i}"] = None
    df[f"price_{i}"] = None

# Fetch detailed information for each market and update the dataframe
print(f"Fetching detailed information for {len(df)} markets...")
for idx, row in tqdm(df.iterrows(), total=len(df)):
    market_id = row['id']
    market_data = get_full_market_data(market_id)
    
    if market_data:
        # Update dataframe with additional columns
        for col in additional_columns:
            if col in market_data:
                df.at[idx, col] = market_data[col]
        
        # Add processed outcome information
        try:
            outcomes_raw = market_data.get("outcomes", [])
            prices_raw = market_data.get("outcomePrices", [])
            
            if outcomes_raw and prices_raw:
                outcomes = ast.literal_eval(outcomes_raw)
                prices = ast.literal_eval(prices_raw)
                
                # Add columns for each outcome and its price
                for i, (outcome, price) in enumerate(zip(outcomes, prices)):
                    if i < 3:  # Only handle up to 3 outcomes
                        df.at[idx, f"outcome_{i+1}"] = outcome
                        df.at[idx, f"price_{i+1}"] = float(price) * 100  # Convert to percentage
        except (SyntaxError, ValueError) as e:
            print(f"Error processing outcomes for market {market_id}: {e}")
    
    # Add a small delay to avoid hitting rate limits
    time.sleep(0.2)

# Save the enhanced dataframe
df.to_csv("enhanced_eth_markets.csv", index=False)
print(f"✅ Saved enhanced dataframe with {len(df)} ETH markets and additional columns.")

# Display the first few rows with the new columns
df.head(3)

Fetching detailed information for 766 markets...


100%|██████████| 766/766 [04:47<00:00,  2.66it/s]

✅ Saved enhanced dataframe with 766 ETH markets and additional columns.





Unnamed: 0,id,question,slug,start_date,end_date,volume,liquidity,active,closed,outcomes,...,oneDayPriceChange,lastTradePrice,bestBid,bestAsk,outcome_1,price_1,outcome_2,price_2,outcome_3,price_3
0,75,Will the Ethereum 2.0 Genesis Event happen suc...,will-the-ethereum-20-genesis-event-happen-succ...,,,,,True,True,"[""Yes"", ""No""]",...,0,0,0,1,Yes,99.999914,No,8.6e-05,,
1,8938,"Will ETH be above $1,500 on January 27th?",will-eth-be-above-1-500-on-january-27th,,,,,True,True,"[""Yes"", ""No""]",...,0,0,0,1,Yes,0.00037,No,99.99963,,
2,61328,"Will ETH be above $1500 on February 3rd, 2021?",will-eth-be-above-1500-on-february-3rd-2021,,,,,True,True,"[""Yes"", ""No""]",...,0,0,0,1,Yes,99.999623,No,0.000377,,


In [None]:
import pandas as pd
import json
import requests
import ast
import time
from tqdm import tqdm  # For progress bar

def get_market_outcomes(market_id):
    """Get detailed information for a specific market ID"""
    url = f"https://gamma-api.polymarket.com/markets/{market_id}"
    r = requests.get(url)
    if r.status_code != 200:
        return None
    return r.json()

# Load the existing ETH markets dataframe
try:
    # Try to load from CSV file first
    df = pd.read_csv("../COS-ECE-473-FinalProj/enhanced_eth_markets.csv")
    print(f"Loaded dataframe with {len(df)} ETH markets")
except FileNotFoundError:
    print("CSV file not found. Please run the initial code block first.")
    exit()

# Save original column names
original_columns = df.columns.tolist()

# New columns to extract from the detailed market data
additional_columns = [
    "outcomes", "outcomePrices", "volumeNum", "liquidityNum", 
    "endDateIso", "volume24hr", "clobTokenIds", "fpmmLive", 
    "competitive", "approved", "spread", "oneDayPriceChange", 
    "lastTradePrice", "bestBid", "bestAsk"
]

# Initialize new columns
for col in additional_columns:
    df[col] = None

# Add explicit percentage columns
df['yes_percentage'] = None
df['no_percentage'] = None
df['outcome_details'] = None

# Fetch detailed information for each market and update the dataframe
print(f"Fetching detailed information for {len(df)} markets...")


API_TO_DF = {
    "startDate"    : "start_date",
    "endDate"      : "end_date",
    "volume"       : "volume",           
    "liquidity"    : "liquidity",       
    "volumeNum"    : "total_volume",     
    "liquidityNum" : "total_liquidity",
    "endDateIso"   : "end_date_iso",
    "bestBid"      : "bestBid",
    "bestAsk"      : "bestAsk",
}

for idx, row in tqdm(df.iterrows(), total=len(df)):
    market_id   = row["id"]
    market_data = get_market_outcomes(market_id)
    if not market_data:
        continue                     # skip if the call failed

    # ---------- generic copy using the key‑map ----------
    for api_key, df_col in API_TO_DF.items():
        if api_key in market_data and pd.notna(market_data[api_key]):
            df.at[idx, df_col] = market_data[api_key]

    # ---------- any extra fields you still want ----------
    for col in additional_columns:      # keep your existing logic
        if col in market_data:
            df.at[idx, col] = market_data[col]

    # ---------- outcome parsing (unchanged except json.loads is safer) ----------
    try:
        outcomes_raw = market_data.get("outcomes", "[]")
        prices_raw   = market_data.get("outcomePrices", "[]")

        outcomes = json.loads(outcomes_raw)
        prices   = list(map(float, json.loads(prices_raw)))

        if outcomes and prices:
            outcome_details = [
                f"{o}: {p*100:.2f}%" for o, p in zip(outcomes, prices)
            ]
            df.at[idx, "outcome_details"] = " | ".join(outcome_details)

            if "Yes" in outcomes and "No" in outcomes:
                yes_idx = outcomes.index("Yes")
                no_idx  = outcomes.index("No")
                df.at[idx, "yes_percentage"] = prices[yes_idx] * 100
                df.at[idx, "no_percentage"]  = prices[no_idx]  * 100

    except (ValueError, json.JSONDecodeError) as e:
        print(f"[{market_id}] outcome parsing error: {e}")

    time.sleep(0.2)   # avoid rate‑limits


# ----------------- OLD ---------------------
# for idx, row in tqdm(df.iterrows(), total=len(df)):
#     market_id = row['id']
#     market_data = get_market_outcomes(market_id)
    
#     if market_data:
#         # Update dataframe with additional columns
#         for col in additional_columns:
#             if col in market_data:
#                 df.at[idx, col] = market_data[col]
        
#         # Add processed outcome information
#         try:
#             outcomes_raw = market_data.get("outcomes", [])
#             prices_raw = market_data.get("outcomePrices", [])
            
#             if outcomes_raw and prices_raw:
#                 outcomes = ast.literal_eval(outcomes_raw)
#                 prices = ast.literal_eval(prices_raw)
                
#                 # Store all outcome details in a readable format
#                 outcome_details = []
#                 for i, (outcome, price) in enumerate(zip(outcomes, prices)):
#                     price_pct = float(price) * 100
#                     outcome_details.append(f"{outcome}: {price_pct:.2f}%")
#                 df.at[idx, 'outcome_details'] = " | ".join(outcome_details)
                
#                 # Specifically handle Yes/No percentages if present
#                 if 'Yes' in outcomes and 'No' in outcomes:
#                     yes_idx = outcomes.index('Yes')
#                     no_idx = outcomes.index('No')
#                     df.at[idx, 'yes_percentage'] = float(prices[yes_idx]) * 100
#                     df.at[idx, 'no_percentage'] = float(prices[no_idx]) * 100
                    
#         except (SyntaxError, ValueError) as e:
#             print(f"Error processing outcomes for market {market_id}: {e}")
    
#     # Add a small delay to avoid hitting rate limits
#     time.sleep(0.2)
# ----------------- OLD ---------------------



# Rename columns to be more descriptive
column_renames = {
    'volumeNum': 'total_volume',
    'liquidityNum': 'total_liquidity',
    'endDateIso': 'end_date_iso',
    'volume24hr': 'volume_24hr',
    'oneDayPriceChange': 'one_day_price_change',
    'lastTradePrice': 'last_trade_price',
}
df = df.rename(columns=column_renames)

# Reorder columns to keep original columns first, followed by new columns
new_columns = original_columns + [
    'yes_percentage', 'no_percentage', 'outcome_details',
    'total_volume', 'total_liquidity', 'end_date_iso', 'volume_24hr',
    'spread', 'one_day_price_change', 'last_trade_price', 'bestBid', 'bestAsk'
]

# Only include columns that actually exist in the dataframe
new_columns = [col for col in new_columns if col in df.columns]
df = df[new_columns]

# Save the enhanced dataframe
df.to_csv("enhanced_eth_markets.csv", index=False)
print(f"✅ Saved enhanced dataframe with {len(df)} ETH markets and additional columns.")

# Display the first few rows with the new columns
print("\nSample of enhanced dataframe:")
pd.set_option('display.max_columns', None)  # Show all columns
print(df.head(3))

Loaded dataframe with 766 ETH markets
Fetching detailed information for 766 markets...


  df.at[idx, df_col] = market_data[api_key]
  df.at[idx, df_col] = market_data[api_key]
  df.at[idx, df_col] = market_data[api_key]
  df.at[idx, df_col] = market_data[api_key]
100%|██████████| 766/766 [04:35<00:00,  2.78it/s]

✅ Saved enhanced dataframe with 766 ETH markets and additional columns.

Sample of enhanced dataframe:
      id                                           question  \
0     75  Will the Ethereum 2.0 Genesis Event happen suc...   
1   8938          Will ETH be above $1,500 on January 27th?   
2  61328     Will ETH be above $1500 on February 3rd, 2021?   

                                                slug start_date  \
0  will-the-ethereum-20-genesis-event-happen-succ...        NaN   
1            will-eth-be-above-1-500-on-january-27th        NaN   
2        will-eth-be-above-1500-on-february-3rd-2021        NaN   

               end_date         volume liquidity  active  closed  \
0  2020-12-02T00:00:00Z  560270.423771  7.877976    True    True   
1  2021-01-28T00:00:00Z  759196.237185  0.000068    True    True   
2  2021-02-03T00:00:00Z  685915.482763  0.007484    True    True   

        outcomes                                      outcomePrices  \
0  ["Yes", "No"]  ["0.999999135




In [13]:
df.columns

Index(['id', 'question', 'slug', 'start_date', 'end_date', 'volume',
       'liquidity', 'active', 'closed'],
      dtype='object')