# Download Files

In [1]:
import requests

import os

import zipfile

# Lab: Downloading Files From Web Servers Using Python

## Download a text file, such as a csv file or json file

In [2]:
r = requests.get("https://nginx/downloads/temp_stores.csv", verify=False)



In [3]:
r.status_code

200

In [4]:
r.text

'store_id,street,city,state,zip,latitude,longitude\n1,3000 Telegraph Ave,Berkeley,CA,94705,37.8555,-122.2604\n2,1001 Broadway,Seattle,WA,98122,47.6114,-122.3214\n3,2510 McKinney Ave,Dallas,TX,75201,32.7958,-96.8015\n4,299 SE 3rd Ave,Miami,FL,33131,25.7720,-80.1891\n5,1202 Broadway,Nashville,TN,37203,36.1568,-86.7881\n'

In [5]:
f = open("temp_stores.csv", "w")

f.write(r.text)

f.close()

## Verify the file /user/labs/week_12/temp_stores.csv on the Linux command line

## Download a binary file; common binary files: PDF, Microsoft Word, Microsoft Excel, images (such as jpeg, jpg, png, etc.), videos (such as mp4), zip, tarball (Linux .tar.gz or .tgz files);  same method for all binary files;  however, for extremely large files, since the file must fit into memory, most download sites provide a custom program for downloading large files

In [6]:
r = requests.get("https://nginx/downloads/temp_sale_report.xlsx", verify=False)



In [7]:
r.status_code

200

In [8]:
r.text[:100]

'PK\x03\x04\x14\x00\x00\x00\x08\x00VeJS\x07AMb�\x00\x00\x00�\x00\x00\x00\x10\x00\x00\x00docProps/app.xmlM�=\x0b\x021\x10D��q��A�Bb@�R��\x0f{\x1b/�dC�B~�9��n\x1eo\x18F�\ng*�\x0e-�T��"�'

In [9]:
r.content[:100]

b'PK\x03\x04\x14\x00\x00\x00\x08\x00VeJS\x07AMb\x81\x00\x00\x00\xb1\x00\x00\x00\x10\x00\x00\x00docProps/app.xmlM\x8e=\x0b\x021\x10D\xff\xcaq\xbd\xb7A\xc1Bb@\xd0R\xb0\xb2\x0f{\x1b/\x90dC\xb2B~\xbe9\xc1\x8fn\x1eo\x18F\xdf\ng*\xe2\xa9\x0e-\x86T\x8f\xe3"'

In [10]:
f = open("temp_sale_report.xlsx", "wb")

f.write(r.content)

f.close()

## Verify the file /user/labs/week_12/temp_sale_report.xlsx on the Linux command line

## zip files are very common;  zip files are binary and downloaded by the previous method; it's best to create a special directory for each zip file, download the zip file into the directory, and unzip it there;  this way you know exactly which files came from which zip file

In [11]:
r = requests.get("https://nginx/downloads/csv.zip", verify=False)



In [12]:
r.status_code

200

In [13]:
os.getcwd()

'/user/labs/week_12'

In [14]:
if not os.path.exists("temp_csv_zip"):
    os.mkdir("temp_csv_zip")

In [15]:
dir_file = os.path.join("temp_csv_zip", "csv.zip")
dir_file

'temp_csv_zip/csv.zip'

In [16]:
f = open(dir_file, "wb")

f.write(r.content)

f.close()

In [17]:
z = zipfile.ZipFile(dir_file, "r")

z.extractall("temp_csv_zip")

z.close()

## Verify the files in the directory /user/labs/week_12/temp_csv_zip on the Linux command line

## You try it - 

## Download the json file https://nginx/downloads/temp_stores_nested.json

## Download the image file https://nginx/downloads/berkeley_logo.png

## Download the zip file https://nginx/downloads/json.zip, create a directory temp_json_zip to hold the zip file and the extracted files, extract the zip

## Solutions are in download_files_solutions

In [18]:
r = requests.get("https://nginx/downloads/temp_stores_nested.json", verify=False)



In [19]:
r.status_code

200

In [20]:
r.text[:500]

'{\n  "creator": "Acme Gourmet Meals",\n  "timestamp": "2021-10-10 18:22:39",\n  "file_name": "temp_stores_nested.json",\n  "version": "12.4.7",\n  "legal": "Unauthorized use, duplication, or possession, blah, blah",\n  "stores": [\n    {\n      "store_id": 1,\n      "street": "3000 Telegraph Ave",\n      "city": "Berkeley",\n      "state": "CA",\n      "zip": "94705",\n      "latitude": 37.8555,\n      "longitude": -122.2604,\n      "sales": [\n        {\n          "sale_id": 128112,\n          "sale_date": "2020'

In [21]:
f = open("temp_stores_nested.json", "w")

f.write(r.text)

f.close()

In [22]:
r = requests.get("https://nginx/downloads/berkeley_logo.png", verify=False)



In [23]:
r.status_code

200

In [24]:
r.content[:100]

b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01<\x00\x00\x00~\x08\x06\x00\x00\x00\xb9\xe2\xad\xd5\x00\x00\x00\x19tEXtSoftware\x00Adobe ImageReadyq\xc9e<\x00\x00\x17\xd9IDATx\xda\xec]\xffy\xe28\x13\xd6~\xcf\xfe\x7f\\\x05\xf1V\x10B\x031'

In [25]:
f = open("berkeley_logo.png", "wb")

f.write(r.content)

f.close()

In [26]:
r = requests.get("https://nginx/downloads/json.zip", verify=False)



In [27]:
r.status_code

200

In [28]:
os.getcwd()

'/user/labs/week_12'

In [29]:
if not os.path.exists("temp_json_zip"):
    os.mkdir("temp_json_zip")

In [30]:
dir_file = os.path.join("temp_json_zip", "json.zip")
dir_file

'temp_json_zip/json.zip'

In [31]:
f = open(dir_file, "wb")

f.write(r.content)

f.close()

In [32]:
z = zipfile.ZipFile(dir_file, "r")

z.extractall("temp_json_zip")

z.close()