# Ukraine refugees: Totals and locations

Source: [United Nations High Commissioner for Refugees](https://data2.unhcr.org/en/situations/ukraine)

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import urllib.request, json

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Totals

In [4]:
totals_url = "https://data2.unhcr.org/population/get/timeseries?widget_id=283561&sv_id=54&population_group=5457&frequency=day&fromDate=1900-01-01&currentSeries=0"

In [5]:
with urllib.request.urlopen(totals_url) as url:
    totals = json.loads(url.read().decode())

In [6]:
total_df = (
    pd.DataFrame(totals["data"]["timeseries"])
    .rename(columns={"data_date": "date"})
    .drop(["unix_timestamp"], axis=1)
)

In [7]:
total_df["cumsum"] = total_df["individuals"].cumsum()

In [8]:
alt.Chart(total_df).mark_line().encode(x="date:T", y="cumsum").properties(width=500)

---

## Locations

In [9]:
locations_url = "https://data2.unhcr.org/population/get/sublocation?geo_id=0&forcesublocation=1&widget_id=283557&sv_id=54&color=%233c8dbc&color2=%23303030&population_group=5460"

In [10]:
with urllib.request.urlopen(locations_url) as url:
    locations = json.loads(url.read().decode())

In [11]:
locations_df = (
    pd.DataFrame(locations["data"])
    .rename(
        columns={
            "geomaster_name": "country",
            "geomaster_id": "id",
            "centroid_lon": "longitude",
            "centroid_lat": "latitude",
        }
    )
    .drop(
        [
            "admin_level",
            "source",
            "population_groups_concat",
            "population_group_id",
            "individuals_type",
            "demography_type",
            "households",
            "population_groups",
            "color",
            "published",
            "lat_max",
            "lon_max",
            "lat_min",
            "lon_min",
        ],
        axis=1,
    )
)

In [12]:
locations_df["individuals"] = locations_df["individuals"].astype(int)

In [13]:
locations_df["share"] = (
    locations_df["individuals"] / locations_df["individuals"].sum() * 100
).round(2)

In [14]:
locations_df["display_date"] = pd.to_datetime(locations_df["date"]).dt.strftime(
    "%b. %-d "
)

In [15]:
locations_df["total"] = locations_df["individuals"].sum()

In [16]:
locations_df

Unnamed: 0,country,id,longitude,latitude,date,month,year,individuals,numChildren,share,display_date,total
0,Poland,10781,19.4013,52.1224,2022-06-07,6,2022,3817564,0,52.5,Jun. 7,7270939
1,Russian Federation,12002,38.1556078125,51.488850139616,2022-06-07,6,2022,1116387,1,15.35,Jun. 7,7270939
2,Hungary,10783,19.4131,47.1672,2022-06-07,6,2022,731098,0,10.06,Jun. 7,7270939
3,Romania,10782,25.3,45.8667,2022-06-07,6,2022,613435,0,8.44,Jun. 7,7270939
4,Republic of Moldova,10784,28.4646,47.1976,2022-06-07,6,2022,491144,0,6.75,Jun. 7,7270939
5,Slovakia,10785,19.4864,48.7062,2022-06-07,6,2022,484661,0,6.67,Jun. 7,7270939
6,Belarus,10786,28.0463,53.5384,2022-06-07,6,2022,16650,0,0.23,Jun. 7,7270939


---

## Exports

In [17]:
total_df.to_csv("data/processed/ukraine_refugees_totals_timeseries.csv", index=False)
locations_df.to_csv("data/processed/ukraine_refugees_totals_countries.csv", index=False)