## Carrie Little
## SIPRI Milex Data - Time Series

### Forecast annual expenditures for each country for the next 10 years

In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Load the data
country_regions_df = pd.read_csv('Country_Regions.csv')
military_expenditure_df = pd.read_csv('Cleaned_Long_SIPRI_Military_Expenditure_Merged.csv')

# Merge the datasets on a common column
# Assuming that 'Country' is the common column for both files
merged_df = pd.merge(military_expenditure_df, country_regions_df, on='Country', how='left')
merged_df

Unnamed: 0,Country,Year,Expenditure-Share_of_Govt_spending,Expenditure-Share_of_GDP,Expenditure_Per_Capita,Expenditure_Constant_2022,Region,Subregion
0,Afghanistan,1970,,0.016296,,62.800318,Asia & Oceania,South Asia
1,Afghanistan,1973,,0.018689,,72.822537,Asia & Oceania,South Asia
2,Afghanistan,1974,,0.016108,,70.782323,Asia & Oceania,South Asia
3,Afghanistan,1975,,0.017221,,74.303508,Asia & Oceania,South Asia
4,Afghanistan,1976,,0.020461,,94.766423,Asia & Oceania,South Asia
...,...,...,...,...,...,...,...,...
8444,Zimbabwe,2019,0.055046,0.007296,35.619603,197.050469,Africa,sub-Saharan Africa
8445,Zimbabwe,2020,0.068723,0.009979,15.454984,260.212407,Africa,sub-Saharan Africa
8446,Zimbabwe,2021,0.077220,0.014815,29.632692,491.183244,Africa,sub-Saharan Africa
8447,Zimbabwe,2022,0.048967,0.007805,20.385843,332.717337,Africa,sub-Saharan Africa


In [5]:
# Load the new dataset from the Excel file
conflict_index_df = pd.read_excel('ACLED Conflict_Index_2024-7-25.xlsx')
conflict_index_df

Unnamed: 0,country,index_level,total_rank_from_avg_rank,avg_rank,Deadliness_rank,Diffusion_rank,Danger_rank,Fragmentation_rank,Deadliness_scaled_rank,Diffusion_scaled_rank,...,total_score_rank,Deadliness_raw,Diffusion_raw,Danger_raw,Fragmentation_raw,Deadliness_scaled,Diffusion_scaled,Danger_scaled,Fragmentation_scaled,total_score
0,Palestine,1,1,3.50,1,1,1,11,1,1,...,1,39929,0.691,6478,66,1.000000,1.000000,1.000000,0.043450,3.043
1,Myanmar,1,2,4.25,3,9,4,1,3,9,...,2,16176,0.040,3092,1519,0.405119,0.057887,0.477308,1.000000,1.940
2,Syria,1,3,6.00,11,5,5,3,11,5,...,5,6555,0.114,2766,146,0.164166,0.164978,0.426984,0.096116,0.852
3,Mexico,1,4,8.00,8,18,2,4,8,18,...,4,7473,0.009,5426,127,0.187157,0.013025,0.837604,0.083608,1.121
4,Nigeria,1,5,9.25,5,15,8,9,5,15,...,8,9445,0.010,2194,67,0.236545,0.014472,0.338685,0.044108,0.634
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,Kiribati,4,160,114.75,125,51,157,126,125,51,...,136,0,0.000,0,0,0.000000,0.000000,0.000000,0.000000,0.000
239,Northern Mariana Islands,4,160,114.75,125,51,157,126,125,51,...,136,0,0.000,0,0,0.000000,0.000000,0.000000,0.000000,0.000
240,Palau,4,160,114.75,125,51,157,126,125,51,...,136,0,0.000,0,0,0.000000,0.000000,0.000000,0.000000,0.000
241,Antarctica,4,160,114.75,125,51,157,126,125,51,...,136,0,0.000,0,0,0.000000,0.000000,0.000000,0.000000,0.000


In [6]:
# Merge the conflict index data with the previously merged dataset using the correct column name
final_merged_df = pd.merge(merged_df, conflict_index_df, left_on='Country', right_on='country', how='left')

# Drop the redundant 'country' column from the merged dataset
final_merged_df.drop(columns=['country'], inplace=True)

# Display the final merged dataset
final_merged_df


Unnamed: 0,Country,Year,Expenditure-Share_of_Govt_spending,Expenditure-Share_of_GDP,Expenditure_Per_Capita,Expenditure_Constant_2022,Region,Subregion,index_level,total_rank_from_avg_rank,...,total_score_rank,Deadliness_raw,Diffusion_raw,Danger_raw,Fragmentation_raw,Deadliness_scaled,Diffusion_scaled,Danger_scaled,Fragmentation_scaled,total_score
0,Afghanistan,1970,,0.016296,,62.800318,Asia & Oceania,South Asia,3.0,31.0,...,31.0,993.0,0.002,385.0,15.0,0.024869,0.002894,0.059432,0.009875,0.097
1,Afghanistan,1973,,0.018689,,72.822537,Asia & Oceania,South Asia,3.0,31.0,...,31.0,993.0,0.002,385.0,15.0,0.024869,0.002894,0.059432,0.009875,0.097
2,Afghanistan,1974,,0.016108,,70.782323,Asia & Oceania,South Asia,3.0,31.0,...,31.0,993.0,0.002,385.0,15.0,0.024869,0.002894,0.059432,0.009875,0.097
3,Afghanistan,1975,,0.017221,,74.303508,Asia & Oceania,South Asia,3.0,31.0,...,31.0,993.0,0.002,385.0,15.0,0.024869,0.002894,0.059432,0.009875,0.097
4,Afghanistan,1976,,0.020461,,94.766423,Asia & Oceania,South Asia,3.0,31.0,...,31.0,993.0,0.002,385.0,15.0,0.024869,0.002894,0.059432,0.009875,0.097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8444,Zimbabwe,2019,0.055046,0.007296,35.619603,197.050469,Africa,sub-Saharan Africa,4.0,59.0,...,54.0,31.0,0.000,126.0,3.0,0.000776,0.000000,0.019450,0.001975,0.022
8445,Zimbabwe,2020,0.068723,0.009979,15.454984,260.212407,Africa,sub-Saharan Africa,4.0,59.0,...,54.0,31.0,0.000,126.0,3.0,0.000776,0.000000,0.019450,0.001975,0.022
8446,Zimbabwe,2021,0.077220,0.014815,29.632692,491.183244,Africa,sub-Saharan Africa,4.0,59.0,...,54.0,31.0,0.000,126.0,3.0,0.000776,0.000000,0.019450,0.001975,0.022
8447,Zimbabwe,2022,0.048967,0.007805,20.385843,332.717337,Africa,sub-Saharan Africa,4.0,59.0,...,54.0,31.0,0.000,126.0,3.0,0.000776,0.000000,0.019450,0.001975,0.022


In [7]:
# Output the cleaned DataFrame with country-specific military expenditure data to a CSV file
output_path = "Cleaned_merged_SIPRI_Region_ACLED.csv"
final_merged_df.to_csv(output_path, index=False)

output_path

'Cleaned_merged_SIPRI_Region_ACLED.csv'