# Hyde 3.2 (general files)

## Parameters

In [1]:
dest_dir = "/tmp/hyde_3_2_general_files"

## Imports

In [31]:
from owid.walden import Catalog
from owid.catalog import Dataset, Table
from etl.steps.data.converters import convert_snapshot_metadata

import pandas as pd
from etl.snapshot import Snapshot
from etl.paths import SNAPSHOTS_DIR

import zipfile
import tempfile
from pathlib import Path
import shutil

## Get snapshot

In [3]:
snap = Snapshot(SNAPSHOTS_DIR / "hyde/2017/general_files.zip")

## Unzip

In [4]:
temp_dir = tempfile.mkdtemp()

In [5]:
zipfile.ZipFile(snap.path).extractall(temp_dir)

In [7]:
!ls {temp_dir}/general_files

HYDE_country_codes.xlsx im_reg_cr.asc           maxln_cr.asc
forest_wwf_cr.asc       iso_cr.asc              sub_iso_cr.asc
garea_cr.asc            landlake.asc


## Make dataset

In [9]:
ds = Dataset.create_empty(dest_dir)
ds.metadata = convert_snapshot_metadata(snap.metadata)
ds.save()

In [27]:
code_path = Path(temp_dir) / "general_files" / "HYDE_country_codes.xlsx"

codes = pd.read_excel(code_path.as_posix(), sheet_name="country", usecols="A:B").rename(
    columns={"ISO-CODE": "country_code", "Country": "country"}
)
codes["country"] = codes["country"].str.strip()
codes = codes.drop_duplicates(subset="country_code", keep="first")
codes.set_index("country_code", inplace=True)

In [28]:
codes.head()

Unnamed: 0_level_0,country
country_code,Unnamed: 1_level_1
4,Afghanistan
8,Albania
12,Algeria
16,American Samoa
20,Andorra


In [29]:
t = Table(codes)
t.metadata.short_name = "country_codes"
t.metadata.name = "Hyde 3.2 Country Codes"
ds.add(t)

## Cleanup

In [32]:
shutil.rmtree(temp_dir)