In [2]:
import json
from pprint import pprint as PP
import pandas as pd
from collections import Counter
import random
import requests
from bs4 import BeautifulSoup as BS

### Get historical data
Inspecting the Texas lotto website, we find the data that feeds the historical numbers.

Using the data that feeds the webpage for historical data, grab a large number of records
(12 years) to get a good sample of data.

Parse the data into a json object and load the contents into a python dictionary.

In [3]:
url = "https://www.texaslottery.com/export/sites/lottery/Games/Powerball/Winning_Numbers/index.html_2013354932.html"
page = requests.get(url)

soup = BS(page.content, "html.parser")
elements = soup.findAll('td')
rows = len(elements)//6

content = []
for i in range(rows):
    try:
        date = str(elements[i*6+0]).split('">')[1].split("<")[0]
        balls = str(elements[i*6+1])[4:-5].split(" - ")
        n1 = balls[0]
        n2 = balls[1]
        n3 = balls[2]
        n4 = balls[3]
        n5 = balls[4]
    
        pb = str(elements[i*6+2])[4:-5]
    except:
        continue
    
    content.append(
        {"date": date,
        "n1": n1,
        "n2": n2,
        "n3": n3,
        "n4": n4,
        "n5": n5,
        "powerball": pb})

    

### Most common bins
Since numbers are stored in sorted order, we can bin the most common occurrences in 5 sets with the Powerball Ball.

In [4]:
df = pd.DataFrame(content)
df.iloc[:, 1:7].mode()

Unnamed: 0,n1,n2,n3,n4,n5,powerball
0,5.0,15,37.0,56.0,59.0,16
1,,28,,,,5


### Other Statistics
All 5 normal balls are looked at to see which numbers appear most commonly.
The Mega Ball is looked at separately.

Reduces the number of balls to only the top 50% to increase odds

In [53]:
balls = df.iloc[:, 1:6].to_numpy().flatten()
int_balls = [int(ball) for ball in balls]
pb = df.iloc[:, 6].to_numpy().flatten()
int_pb = [int(b) for b in pb]

# change here to use a larger or smaller percentage of the top numbers
top_numbers = max(int_balls)//2
top_pbs = max(int_pb)//2

top_balls = Counter(balls).most_common(top_numbers)
top_pb = Counter(pb).most_common(top_numbers)

print(f"{top_numbers} most common draws:")
for mode in top_balls:
    print(f"Ball: {mode[0]} x{mode[1]}")
for mode in top_pb:
    print(f"Powerball Ball: {mode[0]} x{mode[1]}")


34 most common draws:
Ball: 56 x25
Ball: 19 x25
Ball: 59 x25
Ball: 23 x24
Ball: 37 x24
Ball: 18 x23
Ball: 54 x23
Ball: 28 x22
Ball: 3 x22
Ball: 16 x22
Ball: 5 x22
Ball: 50 x22
Ball: 53 x21
Ball: 36 x21
Ball: 29 x20
Ball: 58 x20
Ball: 10 x20
Ball: 39 x20
Ball: 41 x20
Ball: 13 x19
Ball: 38 x19
Ball: 4 x19
Ball: 57 x19
Ball: 15 x19
Ball: 31 x19
Ball: 55 x18
Ball: 51 x18
Ball: 26 x18
Ball: 34 x18
Ball: 7 x18
Ball: 45 x17
Ball: 2 x17
Ball: 35 x17
Ball: 40 x17
Powerball Ball: 5 x12
Powerball Ball: 16 x12
Powerball Ball: 21 x11
Powerball Ball: 4 x10
Powerball Ball: 25 x10
Powerball Ball: 8 x10
Powerball Ball: 18 x9
Powerball Ball: 10 x9
Powerball Ball: 6 x9
Powerball Ball: 9 x9
Powerball Ball: 15 x9
Powerball Ball: 14 x8
Powerball Ball: 20 x7
Powerball Ball: 11 x7
Powerball Ball: 13 x7
Powerball Ball: 17 x7
Powerball Ball: 2 x6
Powerball Ball: 12 x6
Powerball Ball: 26 x6
Powerball Ball: 19 x6
Powerball Ball: 23 x6
Powerball Ball: 24 x6
Powerball Ball: 22 x6
Powerball Ball: 33 x6
Powerball Bal

In [62]:
numbers = []

while (len(numbers) < 5):
    draw = random.choice(top_balls)[0]
    if draw not in numbers:
        numbers.append(draw)

pb = random.choice(top_pb)[0]

print(f"Numbers: {numbers}")
print(f"Powerball: {pb}")


Numbers: ['40', '55', '4', '45', '59']
Powerball: 9
