Skip to content

Commit

Permalink
Merge pull request #970 from openego/features/#969-add-zipfile-check
Browse files Browse the repository at this point in the history
  • Loading branch information
nailend committed Oct 28, 2022
2 parents e4fc048 + 290973b commit 5722f15
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ Added
`#851 <https://github.com/openego/eGon-data/issues/851>`_
* Add charging infrastructure for e-mobility
`#937 <https://github.com/openego/eGon-data/issues/937>`_
* Add zipfile check
`#969 <https://github.com/openego/eGon-data/issues/969>`_
* Add marginal costs for generators abroad and for carriers nuclear and coal
`#907 <https://github.com/openego/eGon-data/issues/907>`_
* Add wind off shore power plants for eGon100RE
Expand Down
51 changes: 40 additions & 11 deletions src/egon/data/datasets/zensus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class ZensusPopulation(Dataset):
def __init__(self, dependencies):
super().__init__(
name="ZensusPopulation",
version="0.0.0",
version="0.0.1",
dependencies=dependencies,
tasks=(
download_zensus_pop,
Expand All @@ -36,7 +36,7 @@ class ZensusMiscellaneous(Dataset):
def __init__(self, dependencies):
super().__init__(
name="ZensusMiscellaneous",
version="0.0.0",
version="0.0.1",
dependencies=dependencies,
tasks=(
download_zensus_misc,
Expand All @@ -46,32 +46,62 @@ def __init__(self, dependencies):
)


def download_and_check(url, target_file, max_iteration=5):
"""Download file from url (http) if it doesn't exist and check afterwards.
If bad zip remove file and re-download. Repeat until file is fine or
reached maximum iterations."""
bad_file = True
count = 0
while bad_file:

# download file if it doesn't exist
if not os.path.isfile(target_file):
# check if url
if url.lower().startswith("http"):
urlretrieve(url, target_file)
else:
raise ValueError("No http url")

# check zipfile
try:
with zipfile.ZipFile(target_file):
print(f"Zip file {target_file} is good.")
bad_file = False
except zipfile.BadZipFile as ex:
os.remove(target_file)
count += 1
if count > max_iteration:
raise StopIteration(
f"Max iteration of {max_iteration} is exceeded"
) from ex


def download_zensus_pop():
"""Download Zensus csv file on population per hectar grid cell."""
"""Download Zensus csv file on population per hectare grid cell."""
data_config = egon.data.config.datasets()
zensus_population_config = data_config["zensus_population"][
"original_data"
]
download_directory = Path(".") / "zensus_population"
# Create the folder, if it does not exists already
# Create the folder, if it does not exist already
if not os.path.exists(download_directory):
os.mkdir(download_directory)

target_file = (
download_directory / zensus_population_config["target"]["file"]
)

if not os.path.isfile(target_file):
urlretrieve(zensus_population_config["source"]["url"], target_file)
url = zensus_population_config["source"]["url"]
download_and_check(url, target_file, max_iteration=5)


def download_zensus_misc():
"""Download Zensus csv files on data per hectar grid cell."""
"""Download Zensus csv files on data per hectare grid cell."""

# Get data config
data_config = egon.data.config.datasets()
download_directory = Path(".") / "zensus_population"
# Create the folder, if it does not exists already
# Create the folder, if it does not exist already
if not os.path.exists(download_directory):
os.mkdir(download_directory)
# Download remaining zensus data set on households, buildings, apartments
Expand All @@ -85,8 +115,7 @@ def download_zensus_misc():
for url, path in url_path_map:
target_file_misc = download_directory / path

if not os.path.isfile(target_file_misc):
urlretrieve(url, target_file_misc)
download_and_check(url, target_file_misc, max_iteration=5)


def create_zensus_pop_table():
Expand Down Expand Up @@ -492,7 +521,7 @@ def adjust_zensus_misc():
This can be caused by missing population
information due to privacy or other special cases (e.g. holiday homes
are listed as buildings but are not permanently populated.)
In the follwong tasks of egon-data, only data of populated cells is used.
In the following tasks of egon-data, only data of populated cells is used.
Returns
-------
Expand Down

0 comments on commit 5722f15

Please sign in to comment.