In [None]:
import pandas as pd
import numpy as np

Importing dataset...

In [None]:
data = pd.read_csv("../input/military-expenditure-of-countries-19602019/Military Expenditure.csv", low_memory=False)

Analysing how dataset can be cleaned.
Column "Code" identified with same information than field "Name", but we are going to use after to merge with another dataset to add continents information.

In [None]:
data.head(n=2)

In [None]:
data["Indicator Name"].nunique()

Field "Indicator Name" identified with only one value, so its useless. Lets drop it.

In [None]:
data.drop(['Indicator Name'],axis=1,inplace=True)

Creating a "Total Spending" column for all countries form 1968 to 2018.

In [None]:
data.fillna(0,inplace=True)
data["Total USD"]=data.iloc[:,2:].sum(axis=1)


Tranforming spendings scale to billions with one decimal place

In [None]:
columns=[str(i) for i in list((range(1960,2019)))]
columns=columns+["Total USD"]
for i in columns:
    data[i]=data[i]/1.e+9
data=np.round(data, decimals=2)
data.head()

Cleaning categories/countries with insignificant spendings:

In [None]:
print("Number of rows before:",len(data))
data.drop(data.loc[data["Total USD"]<1].index,inplace=True)
print("Number of rows after:",len(data))

In [None]:
data.groupby(['Type',"Name"])['Total USD'].sum()

The categories "Semi Autonomous Region","Regions Clubbed Geographically" and "Regions Clubbed Economically" are not interesting for my analysis, so we are going to use only the category "Country"

In [None]:
data.sort_index(by=['Type','Total USD'],ascending=[False,False],inplace=True)
data=data[data['Type'].str.contains("Country")]
data

We are going to add continents to the dataset. This information is necessary to compare wars and spendings by continent. 
We are going to import the continents from [DataHub](http://https://datahub.io/JohnSnowLabs/country-and-continent-codes-list#resource-country-and-continent-codes-list-csv) and merge with the field "Code". 

In [None]:
contcodes = pd.read_csv(r"../input/continents-codes-and-number-of-wars/country-and-continent-codes-list-csv_csv.csv", sep=';',usecols=["Continent_Name","Three_Letter_Country_Code"])
contcodes=contcodes.rename(columns={"Three_Letter_Country_Code": "Code"})
contcodes.head()

Some countries are located in 2 continents, let's deduplicate:

In [None]:
contcodes.groupby('Code').agg("count").sort_values(["Continent_Name"], ascending = False).head(n=10)

In [None]:
contcodes.drop_duplicates(subset='Code', keep="last",inplace=True)

Merging the datasets:

In [None]:
data=pd.merge(data, contcodes , how='left')
data.head()

### Some visualizations and exploration...

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

#### Biggest spenders of all years

In [None]:
df = pd.DataFrame(data, columns = ["Name", 'Total USD'])
top = df.iloc[:10,]
x=top["Name"]
y=top["Total USD"]

In [None]:
sns.set_color_codes("pastel")
ax = sns.barplot(y=x,  x=y, data=df)
ax.set_xlabel('Total USD')
ax.axes.xaxis.label.set_text("Total Spendings (billions)")
ax.axes.yaxis.label.set_text("Countries")
plt.title('Total Millitary Spending from 1968 to 2018')
for p in ax.patches:
    width = p.get_width()
    plt.text(5+p.get_width(), p.get_y()+0.55*p.get_height(),
             '{:1.2f}'.format(width),
             ha='left', va='center')

#### Timeline of Spendings of top 10 countries

In [None]:
time=data.copy()
time=time.drop(["Type","Code","Continent_Name","Total USD"],axis=1)
time=time.T
time=time.loc[:,:9]
new_header = time.iloc[0]
time = time[1:] 
time.columns = new_header 
time.head()

In [None]:
plt.figure()
time.plot(linestyle='-', marker='*',legend=True)
plt.title('Timeline of Millitary Spending of the top 10 countries (billions)')

#### Biggest spenders of 2018

In [None]:
df2 = pd.DataFrame(data, columns = ["Name", "2018"])
df2= df2.sort_values(['2018'],ascending=False).reset_index()
top18 = df2.iloc[:10,]
a=top18["Name"]
b=top18["2018"]

In [None]:
sns.set_color_codes("pastel")
ax1 = sns.barplot(y=a,  x=b, data=top18)
ax1.set_xlabel('2018')
ax1.axes.xaxis.label.set_text("Total Spendings (billions)")
ax1.axes.yaxis.label.set_text("Countries")
plt.title('Total Millitary Spending in 2018')
for p in ax1.patches:
    width = p.get_width()
    plt.text(5+p.get_width(), p.get_y()+0.55*p.get_height(),
             '{:1.2f}'.format(width),
             ha='left', va='center')

#### Top 5 biggest spenders by continent

In [None]:
df4 = pd.DataFrame(data, columns = ["Continent_Name", "Name",'Total USD'])
df4.groupby(["Continent_Name"]).apply(lambda x: x.sort_values(["Total USD"], ascending = False).head(5)).reset_index(drop=True)

### Adding information about conflicts from 1968 to 2018

The number of wars from 1968 to 2018 by continent was get from Wikipedia. Each row of the dataset is a conflict. 
List of wars: 
[1945–1989](http://https://en.wikipedia.org/wiki/List_of_wars:_1945%E2%80%931989)
[1990–2002](http://https://en.wikipedia.org/wiki/List_of_wars:_1990%E2%80%932002)
[2003–present](http://https://en.wikipedia.org/wiki/List_of_wars:_2003%E2%80%93present)

In [None]:
warscontinent = pd.read_csv(r"../input/continents-codes-and-number-of-wars/wars.csv", low_memory=False,sep=';')
warscontinent=pd.DataFrame(warscontinent, columns = ["Continent_Name", 'Year'])
warscontinent.head()

#### Number of Wars by Continent (conflicts that happened in a continent)

In [None]:
warscontinent=warscontinent.groupby(["Continent_Name"]).count()
warscontinent= warscontinent.sort_values(['Year'],ascending=False).reset_index()
warscontinent

In [None]:
height=warscontinent['Year']
bars=warscontinent['Continent_Name']
y_pos = np.arange(len(bars))
plt.bar(y_pos, height)
plt.xticks(y_pos, bars, rotation=60)
plt.title ('Number of Wars by Continent 1968-2018')
plt.show()

#### Military Spendings by continent

In [None]:
USDcontinent = pd.DataFrame(data, columns = ["Continent_Name", 'Total USD'])
USDcontinent=USDcontinent.groupby(["Continent_Name"]).sum()
USDcontinent= USDcontinent.sort_values(['Total USD'],ascending=False).reset_index()
USDcontinent

In [None]:
height=USDcontinent['Total USD']
bars=USDcontinent['Continent_Name']
y_pos = np.arange(len(bars))
plt.bar(y_pos, height)
plt.xticks(y_pos, bars, rotation=60)
plt.title ('Spendings in Military by Continent (1968-2018)')
plt.show()

Merging the spendings and number of wars in one dataset:

In [None]:
final=pd.merge(USDcontinent, warscontinent , how='left')
final.rename(columns={'Year': 'Number of Wars','Total USD':'Total Amount Spent (USD billions)'},inplace=True)
final.set_index('Continent_Name')

### Ploting the comparison of number of wars and total spendings in military by continent:

In [None]:
x=final['Continent_Name']
y1=final['Total Amount Spent (USD billions)']
y2=final['Number of Wars']


ax = final.plot(secondary_y="Number of Wars", kind="bar")
ax.set_xlabel('Continents')
ax.set_ylabel('Total Amount Spent')
ax.right_ax.set_ylabel('Number of Wars', color='red')
ax.right_ax.tick_params(axis='y', labelcolor='red')

ax.set_xticklabels(x,rotation=25)
plt.title ('Number of Wars & Military Spendings by Continent (1968-2018)')

plt.show()

Adding latitute and lontitude in the dataset to plot a geospatial data visualization

In [None]:
#Adding a world map picture form google
ruh_m = plt.imread('../input/continents-codes-and-number-of-wars/world.kpeg.jpg')

#Defining the area of the picture
BBox = ((0, 20, 0, 10)) 
print(BBox)

final['lat']=[3,14,10.5,5.0,10,17.5] #X
final['long']=[8,8,8,3.0,5,2.2]  #Y
final

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.scatter(final.lat, final.long,color="orange",marker="p",s=(final["Number of Wars"]*1.5))
ax.scatter(final.lat+0.7, final.long,color="blue",alpha=0.5,s=(final["Total Amount Spent (USD billions)"]/100))
ax.set_title('Number of Wars (orange) and Military Spendings (blue) by continent (1968-2018)')
ax.set_xlim(BBox[0],BBox[1])
ax.set_ylim(BBox[2],BBox[3])
ax.imshow(ruh_m, zorder=0, extent = BBox, aspect= 'equal')

## Conclusions

1. US is the biggest spender in military from 1968 to 2018, almost 8 times more than the second country, China.
2. In 2018, US is still the first spender and China the second, but now US spends 3 times lesser than China.
3. US increased its investments in military force at 1980's, with a pike at 2010's.
4. We had more wars in Asia and Africa, but the biggest spender in military is North America. Its a great insight the see the existence of:
    * Proxy war: an armed conflict between two states or non-state actors which act on the instigation or on behalf of other parties that are not directly involved. ([Wiki](http://https://en.wikipedia.org/wiki/Proxy_war))
    * Power projection (or force projection): a term used in military to refer to the capacity of a state to deploy and sustain forces outside its territory. ([Wiki](http://https://en.wikipedia.org/wiki/Power_projection))
    * Investments in military forces are made to "show power" and and to increase influence in many areas of the world.