### Powerball Scraper

In [1]:
import pandas as pd
import numpy as np
import regex as re
from bs4 import BeautifulSoup
import requests

In [2]:
html = "https://www.powerball.net/statistics"
response = requests.get(html)
if response.status_code == 200:
	doc = BeautifulSoup(response.text, "html.parser")
else:
	print(f"Failed to fetch the URL: {response.status_code}")

In [3]:
doc

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<title>Powerball Statistics | Current Powerball Stats</title>
<meta content="This Powerball Statistics page is updated immediately after each draw and provides a wealth of statistical information on the Powerball lottery numbers." name="description"/>
<meta content="powerball statistics, powerball stats, hot numbers" name="keywords"/>
<meta content="Powerball.net" name="author"/>
<meta content="Copyright © 2025 - Powerball.net" name="copyright"/>
<meta content="telephone=no" name="format-detection"/>
<meta content="True" name="HandheldFriendly"/><meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<link href="https://www.powerball.net/statistics" hreflang="x-default" rel="alternate"/>
<link href="https://www.powerball.net/es/estadistica" hreflang="es" rel="alternate"/>
<link href="https://www.powerball.net/fr/statistiques" hreflang="fr" rel="alternate

In [4]:
draws = doc.find_all("div", class_="freq-result js-stats-item")

### Main ball

In [5]:
main_ball = []

for draw in draws:
    divs = draw.find_all("div")
    
    # Extract from each div
    number_element = draw.find("div", class_="ball inline")
    if not number_element:
        continue
    number = number_element.text.strip() if number_element else None
    frequency = divs[1].find("strong").text.strip() if len(divs) > 1 and divs[1].find("strong") else None
    date = divs[2].find("strong").text.strip() if len(divs) > 2 and divs[2].find("strong") else None
    
    main_ball.append({

        "number": number,
        "frequency": frequency,
        "last-drawn": date
    })

In [6]:
main_ball

[{'number': '1', 'frequency': 'Drawn 71 times', 'last-drawn': 'Sep 10 1997'},
 {'number': '2', 'frequency': 'Drawn 64 times', 'last-drawn': 'Oct 11 1997'},
 {'number': '3', 'frequency': 'Drawn 63 times', 'last-drawn': 'Aug 30 1997'},
 {'number': '4', 'frequency': 'Drawn 61 times', 'last-drawn': 'Jul 19 1997'},
 {'number': '5', 'frequency': 'Drawn 52 times', 'last-drawn': 'Oct 22 1997'},
 {'number': '6', 'frequency': 'Drawn 71 times', 'last-drawn': 'Oct 29 1997'},
 {'number': '7', 'frequency': 'Drawn 61 times', 'last-drawn': 'Oct 15 1997'},
 {'number': '8', 'frequency': 'Drawn 64 times', 'last-drawn': 'Sep 13 1997'},
 {'number': '9', 'frequency': 'Drawn 69 times', 'last-drawn': 'Oct 22 1997'},
 {'number': '10', 'frequency': 'Drawn 68 times', 'last-drawn': 'Oct 8 1997'},
 {'number': '11', 'frequency': 'Drawn 77 times', 'last-drawn': 'Oct 29 1997'},
 {'number': '12', 'frequency': 'Drawn 58 times', 'last-drawn': 'Sep 24 1997'},
 {'number': '13', 'frequency': 'Drawn 61 times', 'last-drawn':

In [7]:
df_main_ball = pd.DataFrame(main_ball)

In [8]:
len(df_main_ball)

543

### Powerball

In [9]:
powerball = []

for draw in draws:
    divs = draw.find_all("div")
    
    number_element = draw.find("div", class_="powerball inline")
    if not number_element:
        continue
    number = number_element.text.strip() if number_element else None
    frequency = divs[1].find("strong").text.strip() if len(divs) > 1 and divs[1].find("strong") else None
    date = divs[2].find("strong").text.strip() if len(divs) > 2 and divs[2].find("strong") else None
    
    powerball.append({

        "powerball-number": number,
        "frequency": frequency,
        "last-drawn": date
    })

In [10]:
powerball

[{'powerball-number': '1',
  'frequency': 'Drawn 16 times',
  'last-drawn': 'Jul 19 1997'},
 {'powerball-number': '2',
  'frequency': 'Drawn 9 times',
  'last-drawn': 'Sep 21 1996'},
 {'powerball-number': '3',
  'frequency': 'Drawn 13 times',
  'last-drawn': 'Oct 11 1997'},
 {'powerball-number': '4',
  'frequency': 'Drawn 10 times',
  'last-drawn': 'May 7 1997'},
 {'powerball-number': '5',
  'frequency': 'Drawn 10 times',
  'last-drawn': 'Aug 10 1996'},
 {'powerball-number': '6',
  'frequency': 'Drawn 16 times',
  'last-drawn': 'Oct 18 1997'},
 {'powerball-number': '7',
  'frequency': 'Drawn 9 times',
  'last-drawn': 'Feb 15 1997'},
 {'powerball-number': '8',
  'frequency': 'Drawn 10 times',
  'last-drawn': 'Oct 15 1997'},
 {'powerball-number': '9',
  'frequency': 'Drawn 20 times',
  'last-drawn': 'Oct 1 1997'},
 {'powerball-number': '10',
  'frequency': 'Drawn 21 times',
  'last-drawn': 'Aug 6 1997'},
 {'powerball-number': '11',
  'frequency': 'Drawn 15 times',
  'last-drawn': 'Oct 8 

In [11]:
df_powerball = pd.DataFrame(powerball)

In [12]:
len(df_powerball)

394

### Save as csv

In [13]:
df_main_ball.to_csv('data/main_ball.csv', index=False)
df_powerball.to_csv('data/powerball.csv', index=False)

# Scrape 2 - 2015 to Present

In [14]:
active = doc.find_all("div", class_="tab-info is-active")

In [15]:
active_draws = active[0].find_all("div", class_="freq-result js-stats-item")

In [16]:
main_ball = []

for draw in active_draws:
    divs = draw.find_all("div")
    
    # Extract from each div
    number_element = draw.find("div", class_="ball inline")
    if not number_element:
        continue
    number = number_element.text.strip() if number_element else None
    frequency = divs[1].find("strong").text.strip() if len(divs) > 1 and divs[1].find("strong") else None
    date = divs[2].find("strong").text.strip() if len(divs) > 2 and divs[2].find("strong") else None
    
    main_ball.append({

        "number": number,
        "frequency": frequency,
        "last-drawn": date
    })

In [17]:
main_ball_df = pd.DataFrame(main_ball)
len(main_ball_df)

87

In [18]:
main_ball_df.to_csv('data/main_ball_active.csv', index=False)

In [19]:
powerball = []

for draw in active_draws:
    divs = draw.find_all("div")
    
    number_element = draw.find("div", class_="powerball inline")
    if not number_element:
        continue
    number = number_element.text.strip() if number_element else None
    frequency = divs[1].find("strong").text.strip() if len(divs) > 1 and divs[1].find("strong") else None
    date = divs[2].find("strong").text.strip() if len(divs) > 2 and divs[2].find("strong") else None
    
    powerball.append({

        "powerball-number": number,
        "frequency": frequency,
        "last-drawn": date
    })

In [20]:
powerball_df = pd.DataFrame(powerball)

In [21]:
len(powerball_df)

44

In [22]:
powerball_df.to_csv('data/powerball_active.csv', index=False)

## Cleaning

In [23]:
main_ball_df['frequency'] = main_ball_df['frequency'].str.replace(r'Drawn|times', '', regex=True).str.strip()


In [24]:
main_ball_df['frequency'] = pd.to_numeric(main_ball_df['frequency'], errors='coerce')

In [25]:
main_ball_df['last-drawn'] = pd.to_datetime(main_ball_df['last-drawn'], format='%b %d %Y', errors='coerce')


In [26]:
main_ball_df = main_ball_df.head(69)

In [27]:
main_ball_df.to_csv('data/main_ball_cleaned.csv', index=False)

In [28]:
powerball_df['frequency'] = powerball_df['frequency'].str.replace(r'Drawn|times', '', regex=True).str.strip()


In [29]:
powerball_df['frequency'] = pd.to_numeric(powerball_df['frequency'], errors='coerce')

In [30]:
powerball_df['last-drawn'] = pd.to_datetime(powerball_df['last-drawn'], format='%b %d %Y', errors='coerce')

In [31]:
powerball_df = powerball_df.head(26)

In [32]:
powerball_df.to_csv('data/powerball_cleaned.csv', index=False)