In [1]:
# Import necessary packages. 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os 
import math

In [2]:
# Get all of the plots to render in seaparate windows. 
%matplotlib qt

In [3]:
# Read in the Military spending data. 
csv1 = pd.read_csv('API_MS.MIL.XPND.CD_DS2_en_csv_v2_10137926.csv',header=2)

print(csv1.columns)
# Keep only the columns we need for our analysis. We will eventually melt this dataset, so we only want to keep a column for the Country and each of the years.
csv1dr = csv1.drop(['Country Code','Indicator Name','Indicator Code','Unnamed: 62'],axis=1)
print(csv1dr.columns)

Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', 'Unnamed: 62'],
      dtype='object')
Index(['Country Name', '1960', '1961', '1962', '1963', '1964', '1965', '1966',
       '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975',
       '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984',
       '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '20

In [4]:
# Select only our countries of interest via conditional dataframe slicing with the .loc function
df1 = csv1dr.loc[csv1dr['Country Name'].isin(['China','United Kingdom','Russian Federation','Germany','France','Italy','Iran, Islamic Rep.','Saudi Arabia','Korea, Rep.','Israel'])]

df1.index = range(0,10)

# Our data is not in the form we want, each observation should contain the country of interest, the year of interest, and our measure of interest, in this case, Military Spending in 2017 USD
# We can achieve this with the melt function. 
milex = pd.melt(df1,id_vars='Country Name', value_vars= df1.columns[1:])


In [5]:
# Edit our Column names and check that our result is what we wanted. 
milex.columns =  ["Country","Year","Military_Expenditure"]
milex.head(20)

Unnamed: 0,Country,Year,Military_Expenditure
0,China,1960,
1,Germany,1960,2884518000.0
2,France,1960,3881220000.0
3,United Kingdom,1960,4587798000.0
4,"Iran, Islamic Rep.",1960,98679870.0
5,Israel,1960,190277800.0
6,Italy,1960,1137890000.0
7,"Korea, Rep.",1960,275643600.0
8,Russian Federation,1960,
9,Saudi Arabia,1960,140888900.0


In [6]:
# Read in the GDP data via the read_csv function.
csv2 = pd.read_csv('API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10134290.csv',header=2)

print(csv2.columns)
# Keep only the columns we need for our analysis. We will eventually melt this dataset, so we only want to keep a column for the Country and each of the years.
csv2dr = csv2.drop(['Country Code','Indicator Name','Indicator Code',"Unnamed: 62"],axis=1)
print(csv2dr.columns)
# Select only our countries of interest via conditional dataframe slicing with the .loc function
gdp = csv2dr.loc[csv2dr['Country Name'].isin(['China','United Kingdom','Russian Federation','Germany','France','Italy','Iran, Islamic Rep.','Saudi Arabia','Korea, Rep.','Israel'])]

# Reset the index for easier merging of the dataframes
gdp.index = range(0,10)
# Our data is not in the form we want, each observation should contain the country of interest, the year of interest, and our measure of interest, in this case, GDP in 2017 USD
# We can achieve this with the melt function
GDP = pd.melt(gdp,id_vars='Country Name', value_vars= gdp.columns[1:])


Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', 'Unnamed: 62'],
      dtype='object')
Index(['Country Name', '1960', '1961', '1962', '1963', '1964', '1965', '1966',
       '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975',
       '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984',
       '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '20

In [7]:
# Edit our Column names and check that our result is what we wanted. 
GDP.columns = ["Country","Year","GDP"]
GDP.head(20)

Unnamed: 0,Country,Year,GDP
0,China,1960,59716470000.0
1,Germany,1960,
2,France,1960,62651470000.0
3,United Kingdom,1960,72328050000.0
4,"Iran, Islamic Rep.",1960,4199134000.0
5,Israel,1960,2598500000.0
6,Italy,1960,40385290000.0
7,"Korea, Rep.",1960,3957874000.0
8,Russian Federation,1960,
9,Saudi Arabia,1960,


In [8]:
# Load in the population data. 
csv3 = pd.read_csv('API_SP.POP.TOTL_DS2_en_csv_v2_10134466.csv',header=2)

print(csv3.columns)
# Keep only the columns we need for our analysis. We will eventually melt this dataset, so we only want to keep a column for the Country and each of the years.
csv3dr = csv3.drop(['Country Code','Indicator Name','Indicator Code','Unnamed: 62'],axis=1)
print(csv3dr.columns)

# Select only our countries of interest via conditional dataframe slicing with the .loc function
popd = csv3dr.loc[csv3dr['Country Name'].isin(['China','United Kingdom','Russian Federation','Germany','France','Italy','Iran, Islamic Rep.','Saudi Arabia','Korea, Rep.','Israel'])]
# Reset the index for easier merging of the dataframes. 
popd.index = range(0,10)

# Our data is not in the form we want, each observation should contain the country of interest, the year of interest, and our measure of interest, in this case, population
# We can achieve this with the melt function
popd = pd.melt(popd,id_vars='Country Name', value_vars= popd.columns[1:])

# Edit our Column names and check that our result is what we wanted. 
popd.columns = ["Country","Year","Population"]
popd.head(20)

Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', 'Unnamed: 62'],
      dtype='object')
Index(['Country Name', '1960', '1961', '1962', '1963', '1964', '1965', '1966',
       '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975',
       '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984',
       '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '20

Unnamed: 0,Country,Year,Population
0,China,1960,667070000.0
1,Germany,1960,72814900.0
2,France,1960,46814237.0
3,United Kingdom,1960,52400000.0
4,"Iran, Islamic Rep.",1960,21906903.0
5,Israel,1960,2114020.0
6,Italy,1960,50199700.0
7,"Korea, Rep.",1960,25012374.0
8,Russian Federation,1960,119897000.0
9,Saudi Arabia,1960,4086539.0


In [9]:
# Dataframe merging
# Now that our data is ordered the same way, we just need to double-check they are indexed the same way before merging. 
(milex.index==GDP.index).all() & (milex.index==popd.index).all()
# We use the .all statement to return one T/F value that lets us know if the dataframes indexes match instead of having to manually check a list of statements for each row in the datasets.
#We do this first for our military spending data and our GDP data, and then we need only to check our population data against one of these if this is true. 

True

In [10]:
# because of our earlier result, we can just use the .join function here because our dataframes are ordered the same way, and have the same indexes.
mgdp = milex.join(GDP['GDP']).join(popd['Population'])
mgdp.head()
# I viewed the first 5 rows of the dataframe here and in the other three blocks of code where I finished pre-processing each dataset 
# so that I could triple-check if I made an error when loading the data. 

Unnamed: 0,Country,Year,Military_Expenditure,GDP,Population
0,China,1960,,59716470000.0,667070000.0
1,Germany,1960,2884518000.0,,72814900.0
2,France,1960,3881220000.0,62651470000.0,46814237.0
3,United Kingdom,1960,4587798000.0,72328050000.0,52400000.0
4,"Iran, Islamic Rep.",1960,98679870.0,4199134000.0,21906903.0


In [11]:
# Since we are only interested in data past 2010, let's locate first observation for the year 2010 in our dataset
mgdp.shape
# We know our data starts in the year 1960 and includes 2017, spanning 58 years. @e only want information for the 8 most recent 
# years in our dataset, which means we only want the last 80 observations. 


(580, 5)

In [12]:
# Grab the correct years for the data.
cleandat = mgdp[500:580]
cleandat

Unnamed: 0,Country,Year,Military_Expenditure,GDP,Population
500,China,2010,1.157118e+11,6.100620e+12,1.337705e+09
501,Germany,2010,4.625552e+10,3.417095e+12,8.177693e+07
502,France,2010,6.178175e+10,2.642610e+12,6.502751e+07
503,United Kingdom,2010,5.808285e+10,2.441173e+12,6.276636e+07
504,"Iran, Islamic Rep.",2010,1.356127e+10,4.870696e+11,7.456751e+07
505,Israel,2010,1.457319e+10,2.336095e+11,7.623600e+06
506,Italy,2010,3.603229e+10,2.125058e+12,5.927742e+07
507,"Korea, Rep.",2010,2.817518e+10,1.094499e+12,4.955411e+07
508,Russian Federation,2010,5.872023e+10,1.524916e+12,1.428494e+08
509,Saudi Arabia,2010,4.524453e+10,5.282072e+11,2.742568e+07


In [13]:
# We will end up using this value, military spending as a percentage of a country's GDP later on to better compare GDP and Military Spending. 
cleandat['milex_as_pct_of_gdp'] = cleandat['Military_Expenditure']/cleandat['GDP']
cleandat.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Country,Year,Military_Expenditure,GDP,Population,milex_as_pct_of_gdp
500,China,2010,115711800000.0,6100620000000.0,1337705000.0,0.018967
501,Germany,2010,46255520000.0,3417095000000.0,81776930.0,0.013537
502,France,2010,61781750000.0,2642610000000.0,65027510.0,0.023379
503,United Kingdom,2010,58082850000.0,2441173000000.0,62766360.0,0.023793
504,"Iran, Islamic Rep.",2010,13561270000.0,487069600000.0,74567510.0,0.027843


In [14]:
# Pre-Plotting modifications. 

# I used groupby function to re-format my data to create some of the plots in my presentaiton. 
# Note, although I chain it with the sum function, since I am grouping by both Year and Country, there is only ONE 
# observation for each year/country column in the way I set up my data, so this is equivalent. 
# I would have merely used groupby but it doesn't stack the dataframe, and when I tried to stack the data, I couldn't seem
# to stack it the way I wanted to.

#Unfortunately, grouping by year AND country sorts the countries in alphabetical order (emphasized with the first two print statements), so I can't just check if the values 
# are equivalent for the GDP, Military Spending, Population and Military spending as a percentage of GDP with a simple test_clean.loc[:,'variable_name'].values == cleandat.loc[:,'variable_name'].values statement, for example

# We do know... however that China is the first country alphabetically and listed first in our dataset, so we will check the equivalence
# of the first item of our four numerical columns.

test_clean = cleandat.groupby(['Year','Country']).sum().unstack()
print(cleandat.loc[500:509,['Country','GDP']])
print(test_clean.loc['2010','GDP'])
(cleandat.loc[500:509,['Country','GDP']].iloc[0,1] == test_clean.loc['2010','GDP'][0]) & (cleandat.loc[500:509,['Country','Military_Expenditure']].iloc[0,1] == test_clean.loc['2010','Military_Expenditure'][0]) & (cleandat.loc[500:509,['Country','Population']].iloc[0,1] == test_clean.loc['2010','Population'][0])&(cleandat.loc[500:509,['Country','Military_Expenditure']].iloc[0,1] == test_clean.loc['2010','Military_Expenditure'][0]) & (cleandat.loc[500:509,['Country','milex_as_pct_of_gdp']].iloc[0,1] == test_clean.loc['2010','milex_as_pct_of_gdp'][0])

                Country           GDP
500               China  6.100620e+12
501             Germany  3.417095e+12
502              France  2.642610e+12
503      United Kingdom  2.441173e+12
504  Iran, Islamic Rep.  4.870696e+11
505              Israel  2.336095e+11
506               Italy  2.125058e+12
507         Korea, Rep.  1.094499e+12
508  Russian Federation  1.524916e+12
509        Saudi Arabia  5.282072e+11
Country
China                 6.100620e+12
France                2.642610e+12
Germany               3.417095e+12
Iran, Islamic Rep.    4.870696e+11
Israel                2.336095e+11
Italy                 2.125058e+12
Korea, Rep.           1.094499e+12
Russian Federation    1.524916e+12
Saudi Arabia          5.282072e+11
United Kingdom        2.441173e+12
Name: 2010, dtype: float64


True

In [15]:
# Find the ylim for your GDP to Military Spending plot.  
print(max(cleandat['Military_Expenditure']))
# Compare the military spending of our 10 countries
fig, ax = plt.subplots(figsize=(20,10))
# get the data we want with the correct labels
milsp =cleandat.groupby(['Year','Country']).sum()['Military_Expenditure'].unstack()
# make our y-ticks visible, we will make the y-axis USD in billions.
milsp = milsp/1000000000
milp = milsp.plot(kind='bar',ax=ax,title="Military Expenditures from 2010-17",yticks=list(range(0,240,10)))
milp.set_ylabel("2017 USD in billions")
plt.show()

228230672394.564


In [16]:

# find the ylim for your GDP to Military Spending Plot.  
max(cleandat['GDP'])

12237700479375.0

In [17]:
fig, ax = plt.subplots(figsize=(20,10))
# select the columns we want from our datframe
gdp_v_mil =cleandat.groupby(['Year','Country']).sum()[['Military_Expenditure','GDP']].unstack()
# do this so we can view the graph in trillions
gdp_v_mil = gdp_v_mil/1000000000000
#compare each country's GDP to their Military spending with a series of 8 side by side bar charts.
gdpvmp = gdp_v_mil.plot(kind='bar',ax=ax,title="Military Expenditures from 2010-17",yticks=list(np.linspace(0,13,27)))
gdpvmp.set_ylabel("2017 USD in Trillions")
plt.legend(fontsize=7,bbox_to_anchor=[.94,1.1])
plt.show()
# as we can see, GDP seems to dwarf military spending for all countries. 
# we may want to get a better visual of this, though. 
# let's try getting military spending as a percentage of GDP.

In [18]:
#cleandat.head()
fig, ax = plt.subplots(figsize=(20,10))
spendpct = cleandat.groupby(['Year','Country']).sum()['milex_as_pct_of_gdp'].unstack()
print(spendpct.head())
spendpct_plt = spendpct.plot(kind='bar',ax=ax,title="Military Expenditures as a percentage of GDP from 2010-17")
spendpct_plt.set_ylabel("Military Spending / GDP")
#plt.legend(fontsize=7,bbox_to_anchor=[.94,1.1])
plt.show()

Country     China    France   Germany  Iran, Islamic Rep.    Israel     Italy  \
Year                                                                            
2010     0.018967  0.023379  0.013537            0.027843  0.062383  0.016956   
2011     0.018219  0.022577  0.012811            0.024469  0.062373  0.016751   
2012     0.018386  0.022369  0.013113            0.027543  0.060419  0.016274   
2013     0.018723  0.022204  0.012240            0.025667  0.059155  0.015908   
2014     0.019153  0.022304  0.011850            0.022789  0.059922  0.014673   

Country  Korea, Rep.  Russian Federation  Saudi Arabia  United Kingdom  
Year                                                                    
2010        0.025743            0.038507      0.085657        0.023793  
2011        0.025774            0.034234      0.072301        0.023007  
2012        0.026130            0.036860      0.076766        0.021974  
2013        0.026146            0.038462      0.089761        0.020

In [19]:
#Create the variables to compare per person military spending to per person GDP 
cleandat['per_person_milex'] = cleandat['Military_Expenditure']/cleandat['Population']
cleandat['per_person_gdp'] = cleandat['GDP']/cleandat['Population']
# This should be = to the milex_as_pct_of_gdp column bc the ratios aren't changing..... just the scales of the 
#numbers we are dividing.
cleandat['ratio_test'] = cleandat['per_person_milex']/cleandat['per_person_gdp']
cleandat['pp_milex_as_pct_of_gdp'] = cleandat['milex_as_pct_of_gdp']/cleandat['Population']
# Although not everything matches up when I use my logic statement, I believe that it's because of some sort of rounding error 
# For example, index 501 and 504 read false in the third logic statement, but the values of both columns (ratio_test and milex_as_pct of gdp)
# appear to be identical, so I am going to assume that the columns are still equal. 
print((cleandat['ratio_test']==cleandat['milex_as_pct_of_gdp']).all())
print((cleandat['ratio_test']==cleandat['milex_as_pct_of_gdp']).any())
print((cleandat['ratio_test']==cleandat['milex_as_pct_of_gdp']))
cleandat[['ratio_test','milex_as_pct_of_gdp']].head()



False
True
500     True
501    False
502     True
503     True
504    False
505    False
506     True
507    False
508     True
509     True
510     True
511    False
512     True
513     True
514     True
515     True
516     True
517     True
518     True
519    False
520     True
521     True
522     True
523     True
524     True
525     True
526     True
527     True
528     True
529     True
       ...  
550    False
551     True
552     True
553     True
554    False
555    False
556     True
557     True
558     True
559    False
560     True
561     True
562     True
563    False
564    False
565     True
566     True
567     True
568    False
569     True
570     True
571     True
572     True
573     True
574    False
575     True
576    False
577     True
578     True
579     True
Length: 80, dtype: bool


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://panda

Unnamed: 0,ratio_test,milex_as_pct_of_gdp
500,0.018967,0.018967
501,0.013537,0.013537
502,0.023379,0.023379
503,0.023793,0.023793
504,0.027843,0.027843


In [20]:
fig, ax = plt.subplots(figsize=(20,10))
# select the columns we want from our datframe
pp_gdp_v_mil =cleandat.groupby(['Year','Country']).sum()[['per_person_milex','per_person_gdp']].unstack()
# do this so we can view the graph in trillions

#compare each country's GDP to their Military spending with a series of 8 side by side bar charts.
pp_gdpvmp = pp_gdp_v_mil.plot(kind='bar',ax=ax,title=" Per Person Military Expenditures and GDP from 2010-17")
pp_gdpvmp.set_ylabel("2017 USD")
plt.legend(fontsize=6.5,bbox_to_anchor=[.97,1.1])
plt.show()

In [21]:
fig, ax = plt.subplots(figsize=(20,10))
# select the columns we want from our datframe
pp_gdp_v_mil =cleandat.groupby(['Year','Country']).sum()['per_person_milex'].unstack()
#compare each country's GDP to their Military spending with a series of 8 side by side bar charts.
pp_gdpvmp = pp_gdp_v_mil.plot(kind='bar',ax=ax,title=" Per Person Military Expenditures from 2010-17")
pp_gdpvmp.set_ylabel("2017 USD")
plt.legend()
plt.show()

In [22]:
fig, ax = plt.subplots(figsize=(20,10))
# select the columns we want from our datframe
pp_gdp_v_mil =cleandat.groupby(['Year','Country']).sum()['per_person_gdp'].unstack()
# do this so we can view the graph in trillions
pp_gdp_v_mil = pp_gdp_v_mil
#compare each country's GDP to their Military spending with a series of 8 side by side bar charts.
pp_gdpvmp = pp_gdp_v_mil.plot(kind='bar',ax=ax,title=" Per Person GDP from 2010-17")
pp_gdpvmp.set_ylabel("2017 USD")
plt.legend(bbox_to_anchor=[.97,1.1])
plt.show()

In [23]:
fig, ax = plt.subplots(figsize=(20,10))
# select the columns we want from our datframe
pp_gdp_v_mil_pct =cleandat.groupby(['Year','Country']).sum()['pp_milex_as_pct_of_gdp'].unstack()
#compare each country's GDP to their Military spending with a series of 8 side by side bar charts.
pp_gdpvmp_pct = pp_gdp_v_mil_pct.plot(kind='bar',ax=ax,title=" Per Person Military Spending / GDP from 2010-17")
pp_gdpvmp_pct.set_ylabel(" Per Person Military Spending / GDP")
plt.legend(bbox_to_anchor=[.97,1.1])
plt.show()
# Wow, relative to both population size and GDP,Israel and Saudi Arabia are far and away spending the most on 
# Their militaries. 
# You can barely see China on the graph, which makes sense because china was ranked last pretty much across the board 
# for per person GDP and per person military spending, which is surprising, because they have such a large economy and 
# spend so much on their military, but as we can see, they also have a much larger population than the other 10 countries, relatively. 

# Germany consistently has the highest GDP per person (ever year except for 2016), which can be thought of the largest economy, population size being equal across all 10 countries, 
# but they spend the second least amount of money on their military relative to their population size and gross domestic product. 


In [24]:
# Find the fastest growing countries in terms of percentage and fixed dollar amount
# Since there are variations every year, I decided to compare these just by looking at the endpoints of our data, 2010 and 2017
# to obtain the overall change in military spending.

In [25]:
# Obtain the 2010 data via conditional slicing with the iloc function.
c2010 = cleandat.iloc[0:10,:]

c2010 = c2010.set_index('Country')
c2010

Unnamed: 0_level_0,Year,Military_Expenditure,GDP,Population,milex_as_pct_of_gdp,per_person_milex,per_person_gdp,ratio_test,pp_milex_as_pct_of_gdp
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
China,2010,115711800000.0,6100620000000.0,1337705000.0,0.018967,86.500223,4560.512586,0.018967,1.417892e-11
Germany,2010,46255520000.0,3417095000000.0,81776930.0,0.013537,565.630444,41785.556913,0.013537,1.655296e-10
France,2010,61781750000.0,2642610000000.0,65027510.0,0.023379,950.08637,40638.334004,0.023379,3.595258e-10
United Kingdom,2010,58082850000.0,2441173000000.0,62766360.0,0.023793,925.381752,38893.018494,0.023793,3.790725e-10
"Iran, Islamic Rep.",2010,13561270000.0,487069600000.0,74567510.0,0.027843,181.865698,6531.92743,0.027843,3.733875e-10
Israel,2010,14573190000.0,233609500000.0,7623600.0,0.062383,1911.588787,30642.940617,0.062383,8.182838e-09
Italy,2010,36032290000.0,2125058000000.0,59277420.0,0.016956,607.85863,35849.373198,0.016956,2.860433e-10
"Korea, Rep.",2010,28175180000.0,1094499000000.0,49554110.0,0.025743,568.574031,22086.952919,0.025743,5.194832e-10
Russian Federation,2010,58720230000.0,1524916000000.0,142849400.0,0.038507,411.063732,10674.987707,0.038507,2.695648e-10
Saudi Arabia,2010,45244530000.0,528207200000.0,27425680.0,0.085657,1649.714426,19259.587257,0.085657,3.123234e-09


In [26]:
# Obtain the 2010 data via conditional slicing with the iloc function.
c2017 = cleandat.iloc[70:80,:]
c2017 = c2017.set_index('Country')
# Make a new df that we will use to store the overall differences in military spending from 2010-2017
raw_chg = pd.DataFrame(c2017['Military_Expenditure'] - c2010['Military_Expenditure'])
# Make another df that we will use to store the percentage change in military spending from 2010-2017.
pct_chg = pd.DataFrame((c2017['Military_Expenditure'] - c2010['Military_Expenditure'])/ c2010['Military_Expenditure'])
# divide our overall differences in military spending by 1 billion to get the result in current USD in billions in the graph in the next block. 
raw_chg = raw_chg/1000000000
# multiply by 100 to get the result as percentage points for the graph two blocks down. 
pct_chg = pct_chg * 100

In [32]:
# Plot the overall differnces in military spending for the 10 countries from 2010-2017
fig, ax = plt.subplots(figsize=(20,10))
raw_chg.plot(kind='bar',rot=0,title='Overall Change in Military Spending from 2010-2017').set_ylabel("Current USD in Billions ")
# Some of these countries are spending less in 2017 than in 2010, so we want add a horizontal line to the plot where y = 0 
# so that it's easier to which countries are spending less now than they did in 2010. 
plt.axhline(y=0,linewidth=1, color='k')
plt.legend()
plt.show()
# China grew the most in terms of military spending as a fixed value by a wide margin. The next fastest growing countries were Saudi Arabia and South Korea.

Unnamed: 0_level_0,Military_Expenditure
Country,Unnamed: 1_level_1
China,112.518891
Germany,-1.92624
France,-4.011625
United Kingdom,-10.889752
"Iran, Islamic Rep.",0.986778
Israel,1.915842
Italy,-6.795895
"Korea, Rep.",10.977322
Russian Federation,7.61476
Saudi Arabia,24.1688


In [34]:
# Plot the percentage change in military spending for the 10 countries from 2010-2017
fig, ax = plt.subplots(figsize=(20,10))
pct_chg.plot(kind='bar',rot=0,title='Percent Change in Military Spending from 2010-2017').set_ylabel('Percentage Points')
# Some of these countries are spending less in 2017 than in 2010, so we want add a horizontal line to the plot where y = 0 
# so that it's easier to which countries are spending less now than they did in 2010. 
plt.axhline(y=0,linewidth=1, color='k')
plt.legend()
plt.show()
# China almost doubled its military spending over the 8 year period the next fastest growing countries are Saudi Arabia and South Korea (which appears to have grown 2x as fast as the next fastet growing country)



Unnamed: 0_level_0,Military_Expenditure
Country,Unnamed: 1_level_1
China,97.240653
Germany,-4.164345
France,-6.49322
United Kingdom,-18.748654
"Iran, Islamic Rep.",7.276439
Israel,13.146344
Italy,-18.86057
"Korea, Rep.",38.960962
Russian Federation,12.967866
Saudi Arabia,53.418166


In [29]:
# associations and correlations 
# GDP V. Military Spending
fig, ax = plt.subplots(figsize=(20,10))
cleandat.plot(kind='scatter',x = 'GDP', y = 'Military_Expenditure',title='Military Spending v. GDP')
plt.show()
# There definitely isn't a negative relationship, but I think those points on the right hand side of the graph all belong to china
# and skew the realtionship

In [30]:
fig, ax = plt.subplots(figsize=(20,10))
no_china = cleandat.loc[cleandat['Country']!='China']
no_china.plot(kind='scatter',x = 'GDP', y = 'Military_Expenditure',title="Military Spending v. GDP with China Excluded")
plt.show()
# I was right
# there isn't much of a relationship here, but there does appear to be a strong positive correlation between military spending and  gdp for China only. 

In [31]:
fig, ax = plt.subplots(figsize=(20,10))
cleandat.plot(kind='scatter',x = 'Population', y = 'Military_Expenditure',title='Military Spending v. Population')
plt.show()