# Cease-fire violations data from OSCE

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import glob
import altair as alt
import numpy as np

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1500

---

## Get data from web 

#### List of dates

In [4]:
dates = pd.date_range(start="2022-01-18", end="2022-02-19", freq="D")

#### Download the files

In [5]:
# for d in dates:
#     !curl https://www.osce.org/files/{d.strftime("%Y-%m-%d")}%20Daily%20Report_ENG.pdf -o {d.strftime("%Y-%m-%d")}.pdf

#### Read all the files

In [6]:
path = "daily_reports/"
all_files = glob.glob(path + "*.xlsx")

In [7]:
all_files

['daily_reports/2022-02-17.xlsx',
 'daily_reports/2022-01-27.xlsx',
 'daily_reports/2022-02-20.xlsx',
 'daily_reports/2022-02-16.xlsx',
 'daily_reports/2022-01-21.xlsx',
 'daily_reports/2022-02-11.xlsx',
 'daily_reports/2022-02-10.xlsx',
 'daily_reports/2022-01-20.xlsx',
 'daily_reports/2022-01-19.xlsx',
 'daily_reports/2022-02-09.xlsx',
 'daily_reports/2022-02-12.xlsx',
 'daily_reports/2022-02-08.xlsx',
 'daily_reports/2022-01-18.xlsx',
 'daily_reports/2022-02-15.xlsx',
 'daily_reports/2022-02-19.xlsx',
 'daily_reports/2022-02-23.xlsx',
 'daily_reports/2022-02-22.xlsx',
 'daily_reports/2022-02-18.xlsx']

#### Loop through files, read Excel files and append to a list

In [8]:
li = []

for filename in all_files:
    df = pd.read_excel(filename, index_col=None).assign(file=filename)
    li.append(df)

#### Concatenate

In [9]:
src = pd.concat(li, axis=0, ignore_index=True)

#### Clean the column names

In [10]:
src.columns = src.columns.str.lower()

#### Clean up the dates

In [11]:
src[["date", "time"]] = src["date, time"].str.split(", ", expand=True)

In [12]:
src["date"] = src["date"] + str("-2022")

In [13]:
src["date_clean"] = pd.to_datetime(src["date"])

#### Which types of observations are most common?

In [14]:
src.observation.value_counts()

Explosion              1119
Projectile              289
Burst                    93
Shot                     59
Muzzle flash             43
Illumination flare       18
Illumination\nflare      14
Muzzle  flash             5
Shots and bursts          4
Outgoing                  1
Name: observation, dtype: int64

In [15]:
src["observation"] = (
    src["observation"]
    .str.strip()
    .str.replace("Illumination\nflare", "Illumination flare", regex=False)
    .str.replace("Muzzle  flash", "Muzzle flash", regex=False)
)

#### Aggregate and pivot

In [16]:
src["number"] = src["no."].replace("Uncountable", 0, regex=False).astype(float)

In [17]:
observations = (
    src.groupby(["observation"])
    .agg({"smm position": "size", "number": "sum"})
    .reset_index()
)

In [18]:
observations.rename(columns={"smm position": "size", "number": "sum"}, inplace=True)

In [19]:
observations = observations.sort_values("size", ascending=False)
observations

Unnamed: 0,observation,size,sum
1,Explosion,1119,8121.0
5,Projectile,289,1304.0
0,Burst,93,1975.0
6,Shot,59,1578.0
3,Muzzle flash,48,168.0
2,Illumination flare,32,36.0
7,Shots and bursts,4,152.0
4,Outgoing,1,1.0


#### What's the sum of all observations? 

In [20]:
observations["sum"].sum()

13335.0

#### How many observations (count, no sum) per day? 

In [21]:
daily_counts = src.groupby(["date_clean"]).agg({"number": "sum"}).reset_index()

In [22]:
daily_counts.rename(columns={"smm position": "number"}, inplace=True)

In [23]:
daily_counts.tail()

Unnamed: 0,date_clean,number
19,2022-02-18,1180.0
20,2022-02-19,2480.0
21,2022-02-20,1409.0
22,2022-02-21,1445.0
23,2022-02-22,1575.0


In [24]:
alt.Chart(daily_counts[daily_counts["date_clean"] > "2022-01-31"]).mark_bar(
    width=16
).encode(x="date_clean", y="number").properties(
    width=650,
    height=300,
    title="Daily number ceasefire violations in February",
)

#### How many times did each type of observation happen each day? 

In [25]:
daily_counts_types = (
    src.groupby(["date_clean", "observation"]).agg({"number": "sum"}).reset_index()
)

In [26]:
daily_counts_types.rename(columns={"smm position": "count"}, inplace=True)

In [27]:
daily_counts_types.tail()

Unnamed: 0,date_clean,observation,number
98,2022-02-22,Explosion,1420.0
99,2022-02-22,Muzzle flash,20.0
100,2022-02-22,Outgoing,1.0
101,2022-02-22,Projectile,61.0
102,2022-02-22,Shot,7.0


#### Pivot for datawrapper

In [28]:
daily_counts_pivot = (
    daily_counts_types.pivot_table(
        index="date_clean", values="number", columns="observation"
    )
    .fillna(0)
    .reset_index()
)

In [29]:
daily_counts_pivot.tail()

observation,date_clean,Burst,Explosion,Illumination flare,Muzzle flash,Outgoing,Projectile,Shot,Shots and bursts
19,2022-02-18,38.0,1107.0,3.0,0.0,0.0,0.0,32.0,0.0
20,2022-02-19,525.0,1317.0,9.0,13.0,0.0,64.0,552.0,0.0
21,2022-02-20,4.0,1247.0,9.0,6.0,0.0,140.0,3.0,0.0
22,2022-02-21,119.0,923.0,6.0,18.0,0.0,379.0,0.0,0.0
23,2022-02-22,66.0,1420.0,0.0,20.0,1.0,61.0,7.0,0.0


#### Chart

In [30]:
alt.Chart(
    daily_counts_types[
        # (daily_counts_types["observation"] == "Explosion")&
        (daily_counts_types["date_clean"] > "2022-01-31")
    ]
).mark_bar(width=4).encode(
    x="date_clean:T",
    y="number:Q",
    color="observation",
    facet=alt.Facet("observation", columns=4),
).properties(
    width=200,
    height=200,
    title="Daily number of explosions counted as ceasefire violations in February",
)

---

#### Export

In [31]:
daily_counts.to_csv("data/processed/daily_counts.csv", index=False)

In [32]:
daily_counts_pivot.to_csv("data/processed/daily_counts_pivot.csv", index=False)

In [33]:
observations.to_csv("data/processed/observations_totals_jan-feb.csv", index=False)