***SUDAN CIVIL WAR 2023***

## Importing Python Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import seaborn as sns
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV, validation_curve
from sklearn import preprocessing # Import label encoder 
from sklearn.preprocessing import StandardScaler

import statsmodels.api as sm

pd.set_option('display.max_rows', 300) # specifies number of rows to show
pd.options.display.float_format = '{:40,.4f}'.format # specifies default number format to 4 decimal places
plt.style.use('ggplot') # specifies that graphs should use ggplot styling
%matplotlib inline

In [2]:
import warnings
warnings.filterwarnings("ignore")

## DATA LOADING AND EXPLORATION

In [4]:
acled_data = pd.read_csv("data/ACLED_2023-03-01-2024-06-16-Sudan.csv", encoding = "latin1")
#work/Documents/CASA/Final_Dissertation_work/Final_Dissertation/data/ACLED_2023-03-01-2024-06-16-Sudan.csv

In [5]:
acled_data.head()

Unnamed: 0,event_id_cnty,event_date,year,time_precision,disorder_type,event_type,sub_event_type,actor1,assoc_actor_1,inter1,...,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,tags,timestamp,population_best
0,SUD27933,24 May 2024,2024,1,Strategic developments,Strategic developments,Disrupted weapons use,Police Forces of Sudan (2019-),,1,...,13.5295,34.782,1,Sudan News Agency,National,"Weapons seizure: On 24 May 2024, police forces...",0,,1716850558,
1,SUD27933,24 May 2024,2024,1,Strategic developments,Strategic developments,Disrupted weapons use,Unidentified Armed Group (Sudan),,3,...,13.5295,34.782,1,Sudan News Agency,National,"Weapons seizure: On 24 May 2024, police forces...",0,,1716850558,
2,SUD27943,24 May 2024,2024,1,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,Rapid Support Forces,,2,...,15.6921,32.4854,1,Al Rakoba; Twitter,New media-National,"On 24 May 2024, RSF shelled artillery targetin...",7,,1716850558,155108.0
3,SUD27943,24 May 2024,2024,1,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,Civilians (Sudan),,7,...,15.6921,32.4854,1,Al Rakoba; Twitter,New media-National,"On 24 May 2024, RSF shelled artillery targetin...",7,,1716850558,155108.0
4,SUD27977,24 May 2024,2024,1,Strategic developments,Strategic developments,Other,Rapid Support Forces,,2,...,13.5938,30.9284,1,Radio Dabanga,National,"Non-violent activity: On 24 May 2024, RSF orde...",0,,1716850558,


In [None]:
print(acled_data.shape)

The ACLED dataset has 15498 rows and 29 columns.

DATA CLEANING AND ANALYSIS

In [None]:
acled_data.info(verbose=True)

Calculating all NaNs and Dropping those not required in analysis

**COUNTING NULLS BY COLUMN**

In [None]:
acled_data.isnull().sum(axis=0).sort_values(ascending=False)[:12]

Dropping admin3 and tags column as it has maximum number of NULL values

In [None]:
acled_data.drop(columns=['admin3', 'tags'], inplace=True)

**COUNTING NULLS BY ROWS**

In [None]:
acled_data.isnull().sum(axis=1).sort_values(ascending=False).head(20)

In [None]:
acled_data.columns

In [None]:
#import folium
#from folium.plugins import MarkerCluster


# Filter necessary columns and drop rows with missing values
#data_filtered = acled_data[['latitude', 'longitude', 'fatalities', 'event_date']]
#data_filtered.dropna(subset=['latitude', 'longitude'], inplace=True)

# Create a base map centered on Sudan
#sudan_map = folium.Map(location=[15.5, 32.5], zoom_start=6)

# Create a marker cluster
#marker_cluster = MarkerCluster().add_to(sudan_map)

# Add points to the map
#for idx, row in data_filtered.iterrows():
   # folium.Marker(
       # location=[row['latitude'], row['longitude']],
        #popup=f"Date: {row['event_date']}<br>Fatalities: {row['fatalities']}",
        #icon=folium.Icon(color='red' if row['fatalities'] > 0 else 'blue')
   # ).add_to(marker_cluster)

# Save the map to an HTML file
#sudan_map.save('/mnt/data/sudan_map.html')


In [None]:
#from shapely.geometry import Point


# Filter necessary columns and drop rows with missing values
#data_filtered = acled_data[['latitude', 'longitude', 'fatalities', 'event_date']]
#data_filtered.dropna(subset=['latitude', 'longitude'], inplace=True)

# Convert the event_date column to datetime
#data_filtered['event_date'] = pd.to_datetime(data_filtered['event_date'])

# Create a GeoDataFrame
#geometry = [Point(xy) for xy in zip(data_filtered['longitude'], data_filtered['latitude'])]
#gdf = gpd.GeoDataFrame(data_filtered, geometry=geometry)

# Load the shapefile for Sudan (assuming you have it, or we can use a world shapefile and filter Sudan)
#world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
#sudan = world[world.name == "Sudan"]

# Plotting
#fig, ax = plt.subplots(1, 1, figsize=(15, 15))
#sudan.plot(ax=ax, color='white', edgecolor='black')

# Plotting points
#gdf.plot(ax=ax, marker='o', color=gdf['event_date'].apply(lambda x: plt.cm.viridis((x - gdf['event_date'].min()).days / (gdf['event_date'].max() - gdf['event_date'].min()).days)), markersize=gdf['fatalities']*10, alpha=0.6)

# Adding a title
#plt.title('Map of Attacks in Sudan (Size: Fatalities, Color: Date)', fontsize=15)

# Saving the plot as a JPEG file
#jpeg_path = '/mnt/data/sudan_attack_map.jpeg'
#plt.savefig(jpeg_path, format='jpeg')

# Provide the file path of the generated JPEG
#jpeg_path


**Loading multiple csv of FIRMS data from Google Earth Engine**

In [6]:
firms_data1 = pd.read_csv("data/Sudan_Fire_Counts_Grid_Year_2004_2006.csv", encoding = "latin1")

In [7]:
firms_data1.head()

Unnamed: 0,system:index,fireCount,grid_id,year,.geo
0,0,1,5219,2004.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
1,1,2,5219,2005.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
2,2,1,5219,2006.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
3,3,1,5319,2004.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
4,4,1,5319,2005.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."


In [None]:
#work/Documents/CASA/Final_Dissertation_work/Final_Dissertation/data/ACLED_2023-03-01-2024-06-16-Sudan.csv

In [17]:
firms_data1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2550 entries, 0 to 2549
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   system:index  2550 non-null   int64  
 1   fireCount     2550 non-null   int64  
 2   grid_id       2550 non-null   object 
 3   year          2550 non-null   float64
 4   .geo          2550 non-null   object 
dtypes: float64(1), int64(2), object(2)
memory usage: 99.7+ KB


In [18]:
firms_data1.shape

(2550, 5)

In [8]:
firms_data2= pd.read_csv("data/Sudan_Fire_Counts_Grid_Year_2007_2008.csv", encoding = "latin1")

In [9]:
firms_data2.head()

Unnamed: 0,system:index,fireCount,grid_id,year,.geo
0,0,1,5219,2007.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
1,1,2,5219,2008.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
2,2,1,5319,2007.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
3,3,2,5319,2008.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
4,4,2,5419,2007.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."


In [19]:
firms_data2.shape

(1700, 5)

In [10]:
merged_data = pd.concat([firms_data1, firms_data2], ignore_index=True)

In [11]:
merged_data.head(10)

Unnamed: 0,system:index,fireCount,grid_id,year,.geo
0,0,1,5219,2004.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
1,1,2,5219,2005.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
2,2,1,5219,2006.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
3,3,1,5319,2004.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
4,4,1,5319,2005.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
5,5,1,5319,2006.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
6,6,1,5419,2004.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
7,7,1,5419,2005.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
8,8,3,5419,2006.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."
9,9,1,5220,2004.0,"{""geodesic"":false,""crs"":{""type"":""name"",""proper..."


In [20]:
merged_data.shape

(4250, 5)

In [21]:
unique_years = merged_data['year'].unique()

In [22]:
print("Unique years in merged data:", sorted(unique_years))

Unique years in merged data: [2004.0, 2005.0, 2006.0, 2007.0, 2008.0]


In [23]:
unique_grid_id = merged_data['grid_id'].unique()

In [29]:
#print("Unique grid_id in merged data:", sorted(unique_grid_id))

In [33]:
#unique_grid_id

In [31]:
#merged_data1 = pd.merge(firms_data1, firms_data2, on=['grid_id', 'year'], how='outer')

In [32]:
#merged_data1.head(10)

In [15]:
merged_data.to_csv('merged_firms_data.csv', index=False)

In [16]:
firms_data3 = pd.read_csv("data/Sudan_Fire_Counts_Grid_Year_2009_2010.csv", encoding = "latin1")