In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from pyproj import Proj, transform
from tqdm import tqdm
import fiona



# Read the dataset with longitude and latitude of events
data = pd.read_csv('..\\1.Initilization\\ACLED.csv',sep="\t")  # Replace 'events.csv' with the path to your dataset

# Read the areas GeoJSON file
areas_gdf = gpd.read_file('..\\1.Initilization\\output.geojson')

data.describe()

# extract month and year from the event_date column
data['year'] = pd.DatetimeIndex(data['event_date']).year
data['month'] = pd.DatetimeIndex(data['event_date']).month

# create a new column combine year and month
data['year_month'] = data['year'].astype(str) + '-' + data['month'].astype(str)

# convert the year_month column to datetime
data['year_month'] = pd.to_datetime(data['year_month'])

# convert areas_gdf from column to datetime, Apr-17 to 2017-04-01
areas_gdf['from'] = pd.to_datetime(areas_gdf['from'])
# Convert the DataFrame into a GeoDataFrame with Points
geometry = [Point(xy) for xy in zip(data['longitude'], data['latitude'])]
data_gdf = gpd.GeoDataFrame(data, geometry=geometry)

# transform the coordinate system
inProj = Proj(init='epsg:4326')
outProj = Proj(init='epsg:3857')
x1,y1 = data_gdf['geometry'].x, data_gdf['geometry'].y
x2,y2 = transform(inProj,outProj,x1,y1)
data_gdf['geometry'] = [Point(xy) for xy in zip(x2, y2)]

# unique date
date = areas_gdf['from'].unique()

  areas_gdf['from'] = pd.to_datetime(areas_gdf['from'])
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x2,y2 = transform(inProj,outProj,x1,y1)


In [2]:


results = []

# loop through the years
for dat in date:
    # Filter events and areas by the current date
    areas_current_year = areas_gdf[areas_gdf['from'] == dat]  # Assuming the areas are the same for all years

    # filter events within three months 
    events_current_year = data_gdf[(data_gdf['year_month'] >= dat - pd.DateOffset(months=3)) & (data_gdf['year_month'] <= dat)]

    # Perform the spatial join
    data_gdf_with_areas = gpd.sjoin(events_current_year, areas_current_year, how='right', op='within')

    # Group by area and type of events, and count the number of events
    data_gdf_with_areas_event_counts = data_gdf_with_areas.groupby(['title','event_type']).size().reset_index(name='counts')

    # Group by area, count the number of fatalities
    data_gdf_with_areas_fatalities = data_gdf_with_areas.groupby(['title'])['fatalities'].sum().reset_index(name='fatalities')

    # Merge the two dataframes
    data_gdf_with_areas_event_counts = data_gdf_with_areas_event_counts.merge(data_gdf_with_areas_fatalities, on='title')

    # reshape the data
    data_gdf_with_areas_event_counts = data_gdf_with_areas_event_counts.pivot(index='title', columns='event_type', values=['counts','fatalities']).reset_index()

    # combine the headers
    data_gdf_with_areas_event_counts.columns = ['_'.join(col).strip() for col in data_gdf_with_areas_event_counts.columns.values]

    # rename the columns
    data_gdf_with_areas_event_counts.rename(columns={'title_':'title','counts_Battles':'battles','counts_Explosions/Remote violence':'explosions','counts_Violence against civilians':'violence','fatalities_Battles':'fatalities_battles','fatalities_Explosions/Remote violence':'fatalities_explosions','fatalities_Violence against civilians':'fatalities_violence'}, inplace=True)

    # fill the missing values with 0
    data_gdf_with_areas_event_counts.fillna(0, inplace=True)

    # merge with the areas
    areas_gdf_with_events = areas_gdf.merge(data_gdf_with_areas_event_counts, on='title', how='left')

    # append the results

    results.append(areas_gdf_with_events)

# concatenate the results
results = pd.concat(results)


# drop the columns
results = results.drop(['period','to','color','phase3_worse_population','phase3_worse_percentage','phase1_color','phase2_color','phase3_color','phase4_color','phase5_color'], axis=1)


# rename 'from' to 'date'
results.rename(columns={'from':'date'}, inplace=True)

# fill the missing values with 0
results.fillna(0, inplace=True)
results['geometry'] = results['geometry'].buffer(0)

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:3857

  data_gdf_with_areas = gpd.sjoin(events_current_year, areas_current_year, how='right', op='within')
  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:3857

  data_gdf_with_areas = gpd.sjoin(events_current_year, areas_current_year, how='right', op='within')
  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:3857

  data_gdf_with_areas = gpd.sjoin(events_current_year, areas_current_year, how='right', op='within')
  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: 

In [3]:

# Create a new column that combines 'area' and 'date' as a tuple
results['agg_column'] = list(zip(results['title'], results['date']))

# Use the dissolve method to aggregate the GeoDataFrame
aggregated_gdf = results.dissolve(by='agg_column', aggfunc={'battles':'sum', 'explosions':'sum', 'violence':'sum', 'fatalities_battles':'sum', 'fatalities_explosions':'sum', 'fatalities_violence':'sum', 'id':'first', 'estimated_population':'first', 'anl_id':'first','overall_phase':'first', 'country':'first', 'year':'first', 'condition':'first', 'phase1_population':'first',
'phase1_percent':'first', 'phase2_population':'first', 'phase2_percent':'first','phase3_population':'first', 'phase3_percent':'first', 'phase4_population':'first','phase4_percent':'first', 'phase5_population':'first', 'phase5_percent':'first','phase3_worse_percentage_manual':'first', 'phase3_plus_phase4':'first',
'phase2_worse_percentage_manual':'first', 'phase2_plus_phase3':'first','phase2_plus_phase3_plus_phase4':'first'})

# Reset the index to have a clean GeoDataFrame
aggregated_gdf.reset_index(inplace=True)

# Separate the 'area' and 'date' columns from the tuple in 'agg_column'
aggregated_gdf[['title', 'date']] = gpd.GeoDataFrame(aggregated_gdf['agg_column'].tolist(), index=aggregated_gdf.index)

# Drop the 'agg_column' as it is not needed anymore
aggregated_gdf.drop(columns='agg_column', inplace=True)

aggregated_gdf.head()


Unnamed: 0,geometry,battles,explosions,violence,fatalities_battles,fatalities_explosions,fatalities_violence,id,estimated_population,anl_id,...,phase4_percent,phase5_population,phase5_percent,phase3_worse_percentage_manual,phase3_plus_phase4,phase2_worse_percentage_manual,phase2_plus_phase3,phase2_plus_phase3_plus_phase4,title,date
0,"POLYGON ((1810577.311 -2977417.202, 1818626.29...",0.0,0.0,0.0,0.0,0.0,0.0,26893518,96015,26893489,...,0.0,0.0,0.0,0.15,0.15,0.45,0.45,0.45,//Kharas,2022-09-01
1,"MULTIPOLYGON (((5202284.611 -2893069.833, 5201...",0.0,0.0,0.0,0.0,0.0,0.0,12151688,37182,12151682,...,0.09,0.0,0.0,0.0,0.44,0.0,0.75,0.84,4 commune de taolanaro,2017-03-01
2,"MULTIPOLYGON (((5202284.611 -2893069.833, 5201...",0.0,0.0,0.0,0.0,0.0,0.0,12151845,37182,12151843,...,0.18,0.0,0.0,0.65,0.65,0.9,0.72,0.9,4 commune de taolanaro,2017-08-01
3,"MULTIPOLYGON (((5202284.611 -2893069.833, 5201...",0.0,0.0,0.0,0.0,0.0,0.0,12632339,38223,12632331,...,0.06,0.0,0.0,0.18,0.18,0.68,0.62,0.68,5 communes de Taolagnaro,2018-03-01
4,"MULTIPOLYGON (((5202284.611 -2893069.833, 5201...",0.0,0.0,0.0,0.0,0.0,0.0,12940714,38223,12940680,...,0.06,0.0,0.0,0.19,0.19,0.7,0.64,0.7,5 communes de Taolagnaro,2018-08-01


In [4]:
aggregated_gdf.describe()

Unnamed: 0,battles,explosions,violence,fatalities_battles,fatalities_explosions,fatalities_violence,estimated_population,overall_phase,year,phase1_population,...,phase4_population,phase4_percent,phase5_population,phase5_percent,phase3_worse_percentage_manual,phase3_plus_phase4,phase2_worse_percentage_manual,phase2_plus_phase3,phase2_plus_phase3_plus_phase4,date
count,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,...,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,9381.0,9381
mean,29.305831,13.133781,9.265963,145.893828,130.683722,136.177167,302427.9,2.607398,2020.085066,120959.1,...,17238.95,0.065538,151.086025,0.001009,0.267462,0.267481,0.568008,0.507786,0.569266,2020-07-10 17:06:00.345378816
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-01-01 00:00:00
25%,0.0,0.0,0.0,0.0,0.0,0.0,57254.0,2.0,2019.0,14281.0,...,0.0,0.0,0.0,0.0,0.11,0.12,0.4,0.4,0.4,2019-05-01 00:00:00
50%,0.0,0.0,0.0,0.0,0.0,0.0,140803.0,3.0,2020.0,46735.0,...,4000.0,0.05,0.0,0.0,0.22,0.22,0.6,0.55,0.6,2020-08-01 00:00:00
75%,7.0,1.0,6.0,24.0,0.0,22.0,340952.0,3.0,2022.0,126338.0,...,16552.0,0.1,0.0,0.0,0.4,0.4,0.75,0.65,0.75,2022-03-01 00:00:00
max,1450.0,828.0,401.0,10802.0,10802.0,10802.0,31707000.0,9.0,2023.0,7501845.0,...,5619500.0,0.62,143804.0,0.3,0.95,0.9,1.02,1.0,1.02,2023-02-01 00:00:00
std,126.203916,64.340182,30.665635,708.477537,706.762327,699.43291,595425.7,1.029162,1.678604,271701.9,...,70121.67,0.076395,2581.749326,0.009352,0.196733,0.193915,0.257365,0.209447,0.253785,


In [5]:
# save the results
aggregated_gdf.to_file('output.geojson', driver='GeoJSON')