# Online Retail Dataset: Download Dataset
## Imports

In [1]:
import zipfile
from pathlib import Path

import requests

## Download zip file

In [2]:
data_dir = Path.cwd().parents[1] / "data"
if not data_dir.exists():
    data_dir.mkdir()

In [3]:
url = "https://archive.ics.uci.edu/static/public/352/online+retail.zip"

zip_name = url.split("/")[-1].replace("+", "_")
zip_path = data_dir / zip_name

In [4]:
assert not zip_path.with_suffix(".xlsx").exists(), "dataset already exists"

In [5]:
response = requests.get(url, stream=True)  # noqa: S113
assert response.status_code == 200, "failed to download zip file"

In [6]:
with zip_path.open("wb") as file:
    for chunk in response.iter_content(chunk_size=1024):
        file.write(chunk)

## Extract zip file

In [7]:
old_name = "Online Retail.xlsx"
with zipfile.ZipFile(zip_path, "r") as zip_file:
    zip_file.extract(old_name, path=data_dir)

In [8]:
# Delete zip file
zip_path.unlink()

In [9]:
# Rename extracted file
old_file_path = data_dir / old_name

new_name = old_name.lower().replace(" ", "_")
new_file_path = old_file_path.with_name(new_name)

_ = old_file_path.rename(new_file_path)