This is a script to download and unpack old Firefox and Firefox Nightly
releases off of https://hg.mozilla.org/.

In [1]:
# Some basic python packages to use
import requests
import re
import os.path
import zipfile
from html.parser import HTMLParser
from datetime import datetime

In [2]:
# Local filesystem path to store downloaded browsers in.
ReleaseDownloadDir = "D:/FxDownload/Release"
NightlyDownloadDir = "D:/FxDownload/Nightly"

# Local filesystem path to unpack browsers into.
UnpackDestinationDir = "C:/Users/Testing/Desktop/Firefoxen"

# The target platform to fetch packages for
TargetPlatform = "win64"

In [3]:
# Utility code to scrape an index page on hg.mozilla.org for links.

class IndexFileParser(HTMLParser):
    def __init__(self):
        self.raw_links = []
        return super().__init__()

    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
        if tag == 'a':
            for (key, val) in attrs:
                if key == 'href':
                    self.raw_links.append(val)
        return super().handle_starttag(tag, attrs)

def GetLinksFromIndex(url):
    response = requests.get(url)
    response.raise_for_status()

    parser = IndexFileParser()
    parser.feed(response.text)
    parser.close()

    return parser.raw_links

In [4]:
##
## Fetch version numbers for Firefox release builds.
##

# Fetch Index Data
ReleaseIndexURL = "https://ftp.mozilla.org/pub/firefox/releases/"

# Firefox 52 is earliest version that still reasonably works
# with GeckoDriver / Selenium and runs on modern operating
# systems well.
MinimumInterestingRelease = 52

def MajorVersion(version_string):
    return int(version_string.partition('.')[0])

def FilterReleases(raw_links):
    LinkPattern = r'^/pub/firefox/releases/(\d+\.\d+(?:\.\d+)?)/$'
    matches = [re.match(LinkPattern, link) for link in raw_links]
    releases = [match.group(1) for match in matches if match]
    return [ver for ver in releases if MajorVersion(ver) >= 52]

def GroupDotReleases(versions):
    grouped = {}
    for ver in versions:
        major = MajorVersion(ver)
        if major not in grouped:
            grouped[major] = []
        grouped[major].append(ver)

    grouped = list(grouped.values())
    grouped.sort(key=lambda x: MajorVersion(x[0]))
    return grouped

def GetFirstReleasePerVersion(versions):
    return [group[0] for group in GroupDotReleases(versions)]

def GetLastReleasePerVersion(versions):
    return [group[-1] for group in GroupDotReleases(versions)]

# List of all releases, sorted
releases = FilterReleases(GetLinksFromIndex(ReleaseIndexURL))
releases.sort(key=lambda x:(MajorVersion(x), x))

In [5]:
##
## Fetch BuildIds for Firefox nightly builds.
##

NightlyBuildIndex = "https://hg.mozilla.org/mozilla-central/firefoxreleases"

class NightlyIndexFileParser(HTMLParser):
    """HTMLParser for the `firefoxreleases` page"""
    def __init__(self):
        self.builds = []
        self.row = None
        return super().__init__()

    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
        if tag == 'tr':
            assert self.row is None
            self.row = []
        return super().handle_starttag(tag, attrs)

    def handle_data(self, data: str) -> None:
        if self.row is not None:
            txt = data.strip()
            if txt:
                self.row.append(txt)
        return super().handle_data(data)

    def handle_endtag(self, tag: str) -> None:
        if tag == 'tr':
            assert self.row is not None
            self.builds.append(self.row)
            self.row = None
        return super().handle_endtag(tag)

def BuildIdToDatetime(build):
    BuildDateTimeFormat = "%Y%m%d%H%M%S"
    return datetime.strptime(build, BuildDateTimeFormat)

def TestNightlyIndex():
    response = requests.get(NightlyBuildIndex)
    response.raise_for_status()

    parser = NightlyIndexFileParser()
    parser.feed(response.text)
    parser.close()

    # Strip header row
    builds = parser.builds
    if builds[0][0] == 'Revision':
        builds = builds[1:]

    # Sort by ascending BuildId
    builds.sort(key=lambda x: x[1])

    def FilterRow(row):
        return row[3] == TargetPlatform and MajorVersion(row[4]) >= MinimumInterestingRelease

    def TransformRow(row):
        assert len(row) == 6
        return [row[1], MajorVersion(row[4])]

    return [TransformRow(row) for row in builds if FilterRow(row)]

nightlies = TestNightlyIndex()

In [6]:
##
## Determine download links for Firefox builds
##

# Check the candidates directory for last build so we can get
# zip packages instead of full installers.

def FindBuildCandidate(ver):
    CandidatesIndexURL = "https://ftp.mozilla.org/pub/firefox/candidates/{}-candidates/".format(ver)
    raw_links = GetLinksFromIndex(CandidatesIndexURL)

    LinkPattern = r'^/pub/firefox/candidates/{}-candidates/(build\d+)/$'.format(ver)
    builds = [match.group(1) for match in (re.match(LinkPattern, link) for link in raw_links) if match]
    builds.sort(key=lambda x: int(x.partition("build")[2]))
    return builds[-1]

def GetBuildCandidateUrl(ver):
    build = FindBuildCandidate(ver)
    return "https://ftp.mozilla.org/pub/firefox/candidates/{}-candidates/{}/win64/en-US/firefox-{}.zip".format(ver, build, ver)

def GetNightlyUrl(build):
    # Add dashes back into build id
    frags = [build[:4]] + [build[4+2*i:6+2*i] for i in range(5)]

    year = frags[0]
    month = frags[1]
    IndexURL = "https://ftp.mozilla.org/pub/firefox/nightly/{}/{}/{}-mozilla-central/".format(year, month, "-".join(frags))
    links = GetLinksFromIndex(IndexURL)
    links = [link for link in links if link.endswith(".en-US.{}.zip".format(TargetPlatform))]

    if len(links) != 1:
        raise Exception("Could not find expected packages for nightly build")

    assert links[0].startswith("/pub/firefox/")
    return "https://ftp.mozilla.org" + links[0]

In [7]:
##
## Download package for a rev if needed.
##

def IsNightlyRev(build_or_ver):
    return len(build_or_ver) == 14

def LocalDownloadPath(build_or_ver):
    fname = 'firefox-{}.zip'.format(build_or_ver)
    if IsNightlyRev(build_or_ver):
        return os.path.join(os.path.normpath(NightlyDownloadDir), fname)
    else:
        return os.path.join(os.path.normpath(ReleaseDownloadDir), fname)

def CheckIfDownloaded(build_or_ver):
    localFile = LocalDownloadPath(build_or_ver)
    return os.path.isfile(localFile) and os.path.getsize(localFile) > 0

def MaybeDownloadPackage(build_or_ver):
    if (CheckIfDownloaded(build_or_ver)):
        return

    print("Fetching {}".format(build_or_ver))

    if IsNightlyRev(build_or_ver):
        url = GetNightlyUrl(build_or_ver)
    else:
        url = GetBuildCandidateUrl(build_or_ver)

    response = requests.get(url)
    response.raise_for_status()

    with open(LocalDownloadPath(build_or_ver), "wb") as localFile:
        localFile.write(response.content)

In [8]:
# Fetch any missing release builds
shouldFetchReleases = True

if shouldFetchReleases:
    for ver in GetFirstReleasePerVersion(releases):
        MaybeDownloadPackage(ver)

In [9]:
# Choose first Nightly build of each week

def BuildByIsoWeek(nightlies):
    MinBuild = "20220200000000"
    MaxBuild = "20250000000000"
    
    builds = [row[0] for row in nightlies if MinBuild <= row[0] <= MaxBuild]

    week_build = {}

    for build in builds:
        # Get the year,week of isocalendar date
        cal = BuildIdToDatetime(build).isocalendar()
        week = (cal.year, cal.week, cal.weekday // 2)
        #week = build

        # Record first build matching isocalendar week
        if week not in week_build:
            week_build[week] = build

    result = list(week_build.values())
    result.sort()

    return result

In [10]:
# Fetch any missing nightly builds
shouldFetchNightlies = True

if shouldFetchNightlies:
    for build in BuildByIsoWeek(nightlies):
        MaybeDownloadPackage(build)

Fetching 20221022091949
Fetching 20221025094808
Fetching 20221029095010
Fetching 20221101093931
Fetching 20221105092350
Fetching 20221108094235
Fetching 20221112094729
Fetching 20221115095444
Fetching 20221119085828
Fetching 20221122094606
Fetching 20221127093601
Fetching 20221129084032
Fetching 20221203092459
Fetching 20221206034609
Fetching 20221210092830
Fetching 20221213041109
Fetching 20221217093017
Fetching 20221220093956
Fetching 20221224090645
Fetching 20221227093156
Fetching 20221231091949


In [11]:
# Unpack Releases
shouldUnpackReleases = False
shouldUnpackNightly = True

def LocalUnpackPath(ver_or_build):
    ver_or_build = ver_or_build.replace("-", "")
    fname = 'firefox-{}'.format(ver_or_build)
    return os.path.join(os.path.normpath(UnpackDestinationDir), fname)

def CheckIfUnpacked(ver_or_build):
    local = LocalUnpackPath(ver_or_build)
    fxpath = os.path.join(local, "firefox.exe")
    return os.path.isdir(local) and os.path.isfile(fxpath)

def UnpackFirefoxZip(zippath, dstpath):
    """Unpack a firefox.zip package into target path, while stripping
       the top-level firefox directory.
       """
    with zipfile.ZipFile(zippath, 'r') as zip_ref:
        members = zip_ref.infolist()

        # Strip "firefox/" prefix
        for member in members:
            [prefix, mid, rest] = member.filename.partition("firefox/")
            assert prefix == "" and mid == "firefox/"
            member.filename = rest
        
        zip_ref.extractall(dstpath, members)

def MaybeUnpackBuild(build):
    # Download must already have been down for us
    if not CheckIfDownloaded(build):
        raise Exception("Could not find download")

    # If directory exists, assume we are done
    if CheckIfUnpacked(build):
        return

    print("Unpacking {}".format(build))
    
    zippath = LocalDownloadPath(build)
    dstpath = LocalUnpackPath(build)

    UnpackFirefoxZip(zippath, dstpath)

if shouldUnpackReleases:
    for rev in GetFirstReleasePerVersion(releases):
        MaybeUnpackBuild(rev)

if shouldUnpackNightly:
    for build in BuildByIsoWeek(nightlies):
        MaybeUnpackBuild(build)

Unpacking 20220201093942
Unpacking 20220203003951
Unpacking 20220205091402
Unpacking 20220207065816
Unpacking 20220208070047
Unpacking 20220210065747
Unpacking 20220212094743
Unpacking 20220214092817
Unpacking 20220215092702
Unpacking 20220217094417
Unpacking 20220219093323
Unpacking 20220221094019
Unpacking 20220222093709
Unpacking 20220224093648
Unpacking 20220226094610
Unpacking 20220228090129
Unpacking 20220301094029
Unpacking 20220303094735
Unpacking 20220305092613
Unpacking 20220307093830
Unpacking 20220308092232
Unpacking 20220310065911
Unpacking 20220312101128
Unpacking 20220314094248
Unpacking 20220315091352
Unpacking 20220317092857
Unpacking 20220319094158
Unpacking 20220321065848
Unpacking 20220322065927
Unpacking 20220324093615
Unpacking 20220326092842
Unpacking 20220328093900
Unpacking 20220329095604
Unpacking 20220331093541
Unpacking 20220402094413
Unpacking 20220404093932
Unpacking 20220405094056
Unpacking 20220407092959
Unpacking 20220409092224
Unpacking 20220411094830
