In [2]:
!pip install -r requirements.txt

Collecting pandas
  Downloading pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl (10.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
Collecting wordcloud
  Using cached wordcloud-1.9.2.tar.gz (222 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting matplotlib
  Downloading matplotlib-3.7.2-cp39-cp39-macosx_11_0_arm64.whl (7.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.3/7.3 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting folium
  Using cached folium-0.14.0-py2.py3-none-any.whl (102 kB)
Collecting sklearn
  Using cached sklearn-0.0.post5.tar.gz (3.7 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting pyLDAvis
  Using cached pyLDAvis-3.4.1-py3-none-any.whl (2.6 MB)
Collecting ipywidgets
  Downloading ipywidgets-8.0.7-py3-none-any.whl (138 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13

In [3]:
# Import necessary libraries
import pandas as pd
import plotly.express as px
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from collections import Counter
import re
import plotly.io as pio
import plotly.graph_objects as go
import folium
import json
import base64

from folium.plugins import TimestampedGeoJson
from folium.plugins import HeatMapWithTime


In [4]:
data_folder = "./data/"

In [5]:
# Read CSV file
df = pd.read_csv(data_folder + 'ukr-civharm-2023-05-25.csv')  # replace 'your_file.csv' with your file name
print(df.head())

        id        date   latitude  longitude                   location  \
0  CIV0001  02/24/2022  49.850050  36.659031  Chuhuiv, south of Kharkiv   
1  CIV0002  02/24/2022  48.748564  30.218515                       Uman   
2  CIV0003  02/24/2022  50.470055  30.527381                       Kyiv   
3  CIV0004  02/24/2022  47.775537  37.239601                   Vuhledar   
4  CIV0007  02/24/2022  46.227890  34.642830        Semihatka/Henichesk   

                                         description  \
0  Apartment block hit. Crater is very large, pos...   
1  Civilians hit by what appears to have been art...   
2    Explosion in central Kyiv, nothing further yet.   
3    Explosion in central Kyiv, nothing further yet.   
4  [Graphic: Image shows a dead child wrapped in ...   

                                             sources  \
0  https://twitter.com/Michael1Sheldon/status/149...   
1  https://twitter.com/ragipsoylu/status/14967372...   
2  https://twitter.com/FannyFacsar/status/14

In [None]:
df['date'] = pd.to_datetime(df['date'])

# Dataframe nach Werten des Datums sortieren
df_sorted = df.sort_values('date')

# Spalte cumulative_count', die die kumulative Anzahl der Ereignisse darstellt
df_sorted['cumulative_count'] = range(1, len(df_sorted) + 1)

# Erstellen Diagramm mit den kumulierten Daten
fig = px.line(df_sorted, x='date', y='cumulative_count', title='Cumulative Number of Incidents Over Time')

# Tickformat der X-Achse anpassen
fig.update_xaxes(
    dtick="M1",  # Monatliche Ticks
    tickformat="%b\n%Y"  # Format: "Mon\nJahr"
)

fig.show()

pio.write_html(fig, file="line_chart1.html", auto_open=True)

In [None]:
# Create map showing location of incidents
fig = px.scatter_geo(df, lat='latitude', lon='longitude', hover_name='location', title='Location of Incidents')
fig.show()
pio.write_html(fig, file="map1.html", auto_open=True)

In [None]:
fig = go.Figure(data=go.Scattergeo(
    lat = df['latitude'],
    lon = df['longitude'],
    mode = 'markers',
    text = df['location'],
))

fig.update_geos(
    resolution=50,
    showland=True,
    landcolor="rgb(204, 204, 204)",
    countrycolor="rgb(204, 204, 204)",
    lakecolor="rgb(255, 255, 255)",
    projection_type="equirectangular",
    center=dict(lat=48.3794, lon=31.1656),  # center on Ukraine
    lonaxis_range=[22, 40],  # longitude range for Ukraine
    lataxis_range=[44, 52],  # latitude range for Ukraine
    showcountries=True,  # show country borders
)

fig.update_layout(
    title_text = 'Location of Incidents',
)

fig.show()
pio.write_html(fig, file="map2.html", auto_open=True)


In [None]:
## Map with Folium

# Initialize map with focus on Ukraine
m = folium.Map(location=[48.3794, 31.1656], zoom_start=6)

# Add points to the map
for idx, row in df.iterrows():
    folium.Marker([row['latitude'], row['longitude']], popup=row['location']).add_to(m)

# Display the map
m

In [None]:
#map with gejson
# Lesen GeoJSON-Datei
with open('./data/geoBoundaries-UKR-ADM1_simplified.geojson') as f:
    ukraine_geojson = json.load(f)

# Erstellen Karte
fig = go.Figure(data=go.Scattergeo(
    lat = df['latitude'],
    lon = df['longitude'],
    mode = 'markers',
    text = df['location'],
))

# GeoJSON-Daten hinzufügen
fig.update_geos(
    visible=False, 
    resolution=50, 
    showcountries=True, 
    countrycolor="Black",
    showcoastlines=True, 
    coastlinecolor="RebeccaPurple",
    showland=True, 
    landcolor="LightGreen",
    showocean=True, 
    oceancolor="Azure",
    lakecolor="Azure",
    projection_type="natural earth"
)

fig.add_trace(
    go.Choropleth(
        geojson = ukraine_geojson,
        locations = ['UKR'], # ISO 3166-1 alpha-3 code
        z = [1],  # to control color
        colorscale = [[0, 'rgba(0, 0, 0, 0)'], [1, 'rgba(0, 0, 0, 0)']],
        showscale = False,  # we don't need colorbar
        hoverinfo='skip',  # no hover info
        marker_line_width = 2,  # border line width
        marker_line_color = 'red'  # border line color
    )
)

fig.update_layout(
    title_text = 'Location of Incidents',
    geo=dict(showframe=False, showcoastlines=False, projection_type='equirectangular'),
)

fig.show()

pio.write_html(fig, file="map3.html", auto_open=True)

In [None]:
with open('./data/geoBoundaries-UKR-ADM1_simplified.geojson') as f:
    ukraine_geojson = json.load(f)

# Zentrieren Karte auf durchschnittlichen Koordinaten
average_lat = df['latitude'].mean()
average_lon = df['longitude'].mean()

m = folium.Map(location=[average_lat, average_lon], zoom_start=6)

# Marker basierend auf den Datenpunkten
for idx, row in df.iterrows():
    folium.Marker(location=[row['latitude'], row['longitude']], popup=row['location']).add_to(m)

folium.GeoJson(
    ukraine_geojson,
    name='ukraine'
).add_to(m)

m.save('folium_map1.html')
m


In [None]:
df['date'] = pd.to_datetime(df['date'])

fig = px.scatter_geo(df,
                     lat='latitude',
                     lon='longitude',
                     animation_frame='date',
                     title='Ereignisse im Laufe der Zeit')

fig.update_geos(showcountries=True, countrycolor="Black",
                showsubunits=True, subunitcolor="Blue")

fig.show()
pio.write_html(fig, file="map4.html", auto_open=True)

In [None]:
df = df
df['date'] = pd.to_datetime(df['date'])

with open('./data/geoBoundaries-UKR-ADM1_simplified.geojson') as f:
    ukraine_geojson = json.load(f)

df.sort_values('date', inplace=True)

# Erstellen Liste von Daten für jede Zeiteinheit
index_range = pd.date_range(start=df.date.min(), end=df.date.max(), freq='D')
data = []
for date in index_range:
    tmp_df = df[df.date <= date]
    data.append(tmp_df[['latitude', 'longitude']].groupby(['latitude', 'longitude']).size().reset_index().values.tolist())

# Erstellen der Karte
m = folium.Map(location=[48.3794, 31.1656], zoom_start=6)

# GeoJSON-Datei der Ukraine
ukraine_geo = ukraine_geojson  # Pfad zur GeoJSON-Datei

folium.GeoJson(
    ukraine_geo,
    name='ukraine'
).add_to(m)

# HeatMapWithTime-Objekt zur Karte hinzufügen
HeatMapWithTime(data, auto_play=True, max_opacity=0.8, speed_step=1/5, min_speed=30).add_to(m)

# 
m.save('folium_map2.html')
m



In [None]:
# Transformiere 'date' Spalte in datetime-Format
data['date'] = pd.to_datetime(data['date'])

# Extrahiere den Angriffstyp aus der 'associations'-Spalte
data['attack_type'] = data['associations'].str.split('Weapon System=').str[-1]

# Erstelle eine Karte
m = folium.Map(location=[48.3794, 31.1656], zoom_start=5)

# Erstelle für jeden Angriffstyp eine eigene FeatureGroup
attack_types = data['attack_type'].unique()

for attack_type in attack_types:
    fg = folium.FeatureGroup(name=attack_type)
    
    attack_data = data[data['attack_type'] == attack_type]
    
    for _, row in attack_data.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color="red",
            fill=True,
            fill_opacity=0.6
        ).add_to(fg)
    
    fg.add_to(m)

folium.LayerControl().add_to(m)

In [6]:
# Prepare text data for word cloud
df['description'] = df['description'].apply(lambda x: ' '.join(re.sub(r'[^a-zA-Z]',' ',x).split()))
text = ' '.join(df['description'])

# WordCloud erstellen
wordcloud = WordCloud(width = 1000, height = 500).generate(text)

# Plot
fig = plt.figure(figsize=(15,8))
plt.imshow(wordcloud)
plt.axis('off')

# Speichern als PNG
fig.savefig("wordcloud.png", dpi=1800, bbox_inches='tight')

plt.close(fig)


In [7]:
from collections import Counter
import json

# Berechne Wortfrequenzen
words = text.split()
word_frequencies = Counter(words)

# Konvertiere in Liste von Dictionaries
word_frequencies = [{'text': word, 'size': freq} for word, freq in word_frequencies.items()]

# Speichern als JSON
with open('word_frequencies.json', 'w') as f:
    json.dump(word_frequencies, f)

In [None]:
png_file = './wordcloud.png'
# Einlesen der PNG Datei und Konvertierung zu Base64
with open(png_file, "rb") as img_file:
    b64_string = base64.b64encode(img_file.read()).decode()

# Erstellen des HTML Strings
html_string = f"""
<!DOCTYPE html>
<html>
<body>

<h1>My WordCloud</h1>

<img src="data:image/png;base64,{b64_string}" alt="Wordcloud" style="width: 100%;">

</body>
</html>
"""

# Speichern als HTML
with open("wordcloud1.html", "w") as html_file:
    html_file.write(html_string)

In [None]:
# Prepare associations data for word cloud
df['associations'] = df['associations'].apply(lambda x: ' '.join(re.sub(r'[^a-zA-Z]', ' ', x).split()))
associations_text = ' '.join(df['associations'])

In [None]:
# Generate word cloud for associations
wordcloud = WordCloud(width = 1000, height = 500).generate(associations_text)

fig = plt.figure(figsize=(15,8))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

# Speichern als PNG
fig.savefig("wordcloud2.png", dpi=1800, bbox_inches='tight')
plt.close(fig)


In [None]:
png_file = './wordcloud2.png'
# Einlesen der PNG Datei und Konvertierung zu Base64
with open(png_file, "rb") as img_file:
    b64_string = base64.b64encode(img_file.read()).decode()

# Erstellen des HTML Strings
html_string = f"""
<!DOCTYPE html>
<html>
<body>

<h1>My WordCloud</h1>

<img src="data:image/png;base64,{b64_string}" alt="Wordcloud" style="width: 100%;">

</body>
</html>
"""

# Speichern als HTML
with open("wordcloud2.html", "w") as html_file:
    html_file.write(html_string)