# Download files in Python

`requests` is the main library for HTTP requests in python: https://requests.readthedocs.io/en/latest/

In [1]:
import requests

## Download an image

In [2]:
image_url = 'https://www.python.org/static/community_logos/python-logo-master-v3-TM.png'

r = requests.get(image_url)

In [3]:
with open('python_logo.png', 'wb') as image:
    image.write(r.content)

## Download a PDF

In [4]:
pdf_url = 'https://www.db-book.com/slides-dir/PDF-dir/ch1.pdf'

r = requests.get(pdf_url)

In [5]:
with open('python.pdf', 'wb') as pdf:
    for chunk in r.iter_content(chunk_size=1024):
        if chunk:
            pdf.write(chunk)

## Download a video from a website

Beautiful Soup is a popular Python library for extracting data from HTML and XML documents. We will talk about it later in the course.

In [6]:
from bs4 import BeautifulSoup

In [7]:
url = 'https://www.loc.gov/item/98501222/'

# get the HTML from the website
r = requests.get(url)

In [8]:
# parse the HTML with Beautiful Soup
soup = BeautifulSoup(r.content, 'html.parser')

In [9]:
# find the video sources from that website
link_mpeg = soup.select_one('source[type="application/x-mpegURL"]')['src']
link_mp4 = soup.select_one('source[type="video/mp4"]')['src']

In [10]:
print(link_mpeg)
print(link_mp4)

https://tile.loc.gov/streaming-services/iiif/media:ammem:SpanishAmericanWar:0020/full/full/0/full/default.m3u8
https://tile.loc.gov/storage-services/media/ammem/SpanishAmericanWar/0020.mp4


In [11]:
# obtain filename by splitting url and getting last string
file_name = link_mp4.split('/')[-1]

file_name

'0020.mp4'

In [12]:
r = requests.get(link_mp4, stream=True)

In [13]:
# save the video stream in chunks
with open(file_name, 'wb') as video:
    for chunk in r.iter_content(chunk_size=1024*1024):
        if chunk:
            video.write(chunk)