<a href="https://colab.research.google.com/github/olga-terekhova/colabs/blob/main/Get_UFLI_Toolbox.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Download UFLI toolbox files for selected lessons



**Specify parameters 1.1. - 1.3. below.**  
Refer to https://ufli.education.ufl.edu/foundations/toolbox/ to find the needed toolbox.

**Select menu *(Runtime - Run all)* or Press *(Ctrl-F9)* to prepare files.**  

**Download the files** from the *Files* section on the left. There will be separate lesson files plus a "UFLI_toolbox.pdf" containing all separate lesson PDFs.  

To reset and to download a new set of files, select menu *(Runtime - Disconnect and delete runtime)*, and start from the beginning.

## 1. Set up parameters

In [1]:
# 1.1. Select the current UFLI toolbox
# in the drop down on the right.

current_toolbox = "1-34" #@param ["1-34", "35-41", "42-53", "54-62", "63-68", "69-76", "77-83", "84-88", "89-94", "95-98", "99-106", "107-110", "111-118", "119-128" ]

In [2]:
# 1.2. Specify the list of lessons to download separated by comma

lesson_number_list = [19, 20, 21, 22]

In [3]:
# 1.3. Specify the list of needed resource types.
# Possible values: 'Decodable Passage', 'Home Practice', 'Roll and Read', 'Google Slides', 'PowerPoint'

resource_type_list = ['Decodable Passage', 'Home Practice', 'Roll and Read']

## Download and merge files

In [4]:
# Set up file paths

url = "https://ufli.education.ufl.edu/foundations/toolbox/" + current_toolbox + "/"
lesson_number_list = [str(x) for x in lesson_number_list]
min_lesson = min(lesson_number_list)
max_lesson = max(lesson_number_list)
merged_pdf_name = "UFLI_toolbox_" + min_lesson + "_" + max_lesson + ".pdf"

In [5]:
# Define download function

import requests

def download_file(url, filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            f.write(response.content)
        print(f"File downloaded successfully as {filename}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")

In [6]:
# Get links to the resources listed in the toolbox

from bs4 import BeautifulSoup

response = requests.get(url)

if response.status_code == 200:
    print(f"Connected to toolbox {current_toolbox}")
    html = response.content
    soup = BeautifulSoup(html, "html.parser")

    selected_results = []

    for div in soup.find_all("div", class_="et_pb_text_inner"):
        table = div.find("table")
        if table:
            for a in table.find_all("a"):
                full_title = a.get_text(strip=True)
                parts = full_title.split(" ", 1)
                lesson_number = parts[0] if len(parts) > 0 else ""
                resource_type = parts[1] if len(parts) > 1 else ""

                # Apply filter
                if resource_type in resource_type_list and lesson_number in lesson_number_list:
                    selected_results.append({
                        "title": full_title,
                        "address": a.get("href"),
                        "lesson_number": lesson_number,
                        "resource_type": resource_type
                    })
else:
    print(f"Failed to connect to toolbox {current_toolbox}. Status code: {response.status_code}")


print(selected_results)

Connected to toolbox 1-34
[{'title': '19 Decodable Passage', 'address': 'https://ufli.education.ufl.edu/wp-content/uploads/2022/07/19_Decodable_UFLIFoundations.pdf', 'lesson_number': '19', 'resource_type': 'Decodable Passage'}, {'title': '19 Home Practice', 'address': 'https://ufli.education.ufl.edu/wp-content/uploads/2022/08/19_HomePractice_UFLI-Foundations.pdf', 'lesson_number': '19', 'resource_type': 'Home Practice'}, {'title': '19 Roll and Read', 'address': 'https://ufli.education.ufl.edu/wp-content/uploads/2022/07/19_RollRead_UFLIFoundations.pdf', 'lesson_number': '19', 'resource_type': 'Roll and Read'}, {'title': '20 Decodable Passage', 'address': 'https://ufli.education.ufl.edu/wp-content/uploads/2022/07/20_Decodable_UFLIFoundations.pdf', 'lesson_number': '20', 'resource_type': 'Decodable Passage'}, {'title': '20 Home Practice', 'address': 'https://ufli.education.ufl.edu/wp-content/uploads/2022/08/20_HomePractice_UFLI-Foundations.pdf', 'lesson_number': '20', 'resource_type': 'Ho

In [7]:
# Download resource files locally

import os
from urllib.parse import urlparse

downloaded_filenames = []

# Download resources
for result in selected_results:
    # Extract filename from URL
    parsed_url = urlparse(result["address"])
    filename = os.path.basename(parsed_url.path)

    # Add filename to the list
    downloaded_filenames.append(filename)

    # Download the file
    download_file(result["address"], filename)

File downloaded successfully as 19_Decodable_UFLIFoundations.pdf
File downloaded successfully as 19_HomePractice_UFLI-Foundations.pdf
File downloaded successfully as 19_RollRead_UFLIFoundations.pdf
File downloaded successfully as 20_Decodable_UFLIFoundations.pdf
File downloaded successfully as 20_HomePractice_UFLI-Foundations.pdf
File downloaded successfully as 20_RollRead_UFLIFoundations.pdf
File downloaded successfully as 21_Decodable_UFLIFoundations.pdf
File downloaded successfully as 21_HomePractice_UFLI-Foundations-1.pdf
File downloaded successfully as 21_RollRead_UFLIFoundations.pdf
File downloaded successfully as 22_Decodable_UFLIFoundations-k.pdf
File downloaded successfully as 22_HomePractice_UFLI-Foundations-1.pdf
File downloaded successfully as 22_RollRead_UFLIFoundations.pdf


In [8]:
# Install PDF library

!pip install -q PyPDF2

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━[0m [32m153.6/232.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [9]:
# Define function to merge PDFs in a single file

def merge_pdfs():
  import PyPDF2

  # create a list of all PDF files in the current directory
  pdf_files = []
  for filename in os.listdir():
      if filename.endswith('.pdf'):
          pdf_files.append(filename)

  if merged_pdf_name in pdf_files:
    return "File " + merged_pdf_name + " already exists. No action taken. Delete it first."

  if len(pdf_files) == 0:
    return "No PDF files found. No action taken."

  # sort pdf_files in the alphabetical order
  pdf_files.sort()

  # merge all the files in the pdf_files list into a single pdf
  merger = PyPDF2.PdfMerger()

  for filename in pdf_files:
      merger.append(filename)

  merger.write(merged_pdf_name)

  # get the string of filenames in pdf_files
  pdf_files_str = ', \n'.join(pdf_files)

  return "Files merged:\n" + pdf_files_str + " \n\nRefresh the Files area and locate " + merged_pdf_name + "."

In [10]:
# Merge all PDF files downloaded locally
print(merge_pdfs())

Files merged:
19_Decodable_UFLIFoundations.pdf, 
19_HomePractice_UFLI-Foundations.pdf, 
19_RollRead_UFLIFoundations.pdf, 
20_Decodable_UFLIFoundations.pdf, 
20_HomePractice_UFLI-Foundations.pdf, 
20_RollRead_UFLIFoundations.pdf, 
21_Decodable_UFLIFoundations.pdf, 
21_HomePractice_UFLI-Foundations-1.pdf, 
21_RollRead_UFLIFoundations.pdf, 
22_Decodable_UFLIFoundations-k.pdf, 
22_HomePractice_UFLI-Foundations-1.pdf, 
22_RollRead_UFLIFoundations.pdf 

Refresh the Files area and locate UFLI_toolbox_19_22.pdf.
