# Ukraine events from the Center for Information Resilience

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Get data from web 

#### Read data

In [4]:
#### Must copy response to json from inspector and save a local copy from here: https://maphub.net/Cen4infoRes/russian-ukraine-monitor

In [5]:
import json

with open("data/raw/center_for_info_res_ukraine_02-27-22.geojson") as f:
    jsons = json.load(f)

#### Get the features and read them into a dataframe

In [6]:
data_list = []

for j in jsons["geojson"]["features"]:
    data_list.append(j)

In [7]:
df = pd.DataFrame(data_list)

#### Flatten the nested columns

In [8]:
df[["type", "coordinates"]] = pd.json_normalize(df["geometry"])

In [9]:
df[
    [
        "title",
        "description",
        "group",
        "marker-color",
        "image.id",
        "image.w",
        "image.h",
        "image.tip_color",
        "image.avg_color",
        "url",
    ]
] = pd.json_normalize(df["properties"])

In [10]:
df.drop(
    [
        "geometry",
        "properties",
        "image.id",
        "image.w",
        "image.h",
        "image.tip_color",
        "image.avg_color",
        "url",
    ],
    axis=1,
    inplace=True,
)

#### Dates

In [11]:
df["date"] = df.title.str[:10]

#### Strip out columns from title column

In [12]:
df["details"] = df.title.str[10:]

#### Coordinates

In [13]:
df[["longitude", "latitude"]] = pd.DataFrame(df.coordinates.tolist(), index=df.index)

#### Prep 'description' column and split to snag URL of event

In [14]:
df["description_clean"] = (
    df["description"]
    .str.replace("\n", "|", regex=False)
    .str.replace("DATE:  ", "", regex=False)
    .str.replace("LINK: ", "", regex=False)
    .str.replace("DATE: ", "", regex=False)
    .str.replace("BRIEF DESCRIPTION: ", "", regex=False)
    .str.replace("VIOLENCE LEVEL: 1|", "", regex=False)
    .str.replace("VIOLENCE LEVEL: 2|", "", regex=False)
    .str.replace("VIOLENCE LEVEL: 3|", "", regex=False)
    .str.replace("VIOLENCE LEVEL: 4|", "", regex=False)
    .str.replace("VIOLENCE LEVEL: 5|", "", regex=False)
    .str.replace("DATE:", "", regex=False)
    .str.replace(" Logistics convoy moving towards Belarus|03/02/2022", "", regex=False)
    .str.replace(" Russian military equipment|31/01/2022|", "|", regex=False)
    .str.replace(
        "|Iskander missile systems at Asipovicky training ground|", "||", regex=False
    )
)

In [15]:
df[["drop", "url"]] = df["description_clean"].str.split("|", expand=True, n=1)

#### Marker group colors into English categories

In [16]:
colors = {
    "#005e38": "movement and buildup of military assets",
    "#cc1b15": "civilian casualties, infrastructure damage and military losses",
    "#f18729": "evidence of gunfire, bombing, shelling or explosion",
    "#ffcc00": "other footage",
}

In [17]:
df["type"] = df["marker-color"].map(colors)

#### Dump what we don't need

In [18]:
df.drop(
    [
        "drop",
        "title",
        "coordinates",
        "title",
        "description",
        "description_clean",
        "marker-color",
        "group",
    ],
    axis=1,
    inplace=True,
)

#### Rearrange columns for export

In [19]:
df = df[["id", "date", "details", "url", "type", "longitude", "latitude"]]

---

#### Export all

In [20]:
df.to_csv("data/processed/cir_ukraine_data.csv", index=False)

#### Export all events not related to troop buildup

In [21]:
df[df["type"] != "movement and buildup of military assets"].to_csv(
    "data/processed/cir_ukraine_data_not_troops.csv", index=False
)