In [1]:
import math
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn.preprocessing
from sklearn import metrics
from math import sqrt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy import stats
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings("ignore")
import acquire
import prepare



# Top Military Strengths by Defense Budget

### Project Description and Goals
This is an analysis of the top 25 militaries by country with the highest defense budget. This would deliver insight into current and future capabilities at a glance. My overall goal here is to show the features that are related to defense budget and how this makes a force not to be reckoned with. In the future with more available data, we can deliver critical insight into the current and future size, shape, and capabilities of the world's key military powers.

### Data Aquisition

In [2]:
# acquires the data frame from acquire function
df = acquire.acquire_military()

### Data Preparation

In [4]:
# prepares data frame from prepare function
df = prepare.prep_military(df)
df.head()

Unnamed: 0,country,country_code,active_personnel,air_carriers,armored_vehicles,arty,attack_aircraft,avail_manpower,corvettes,defense_budget,...,square_land_area,subs,tanker_fleet,tanks,total_pop,trainers,transports,total_air_strength,total_sea_strength,total_land_strength
132,United States,USA,1390000,20,45193,2837,1693,147399295,22,770000000000,...,9826675,68,627,6612,334998398,2661,982,25447,220,56008
26,China,CHN,2000000,3,35000,5854,652,754864769,70,250240000000,...,9596961,79,3,5250,1397897720,399,286,5651,430,49264
103,Russia,RUS,850000,1,30122,14145,1283,69737187,86,154000000000,...,17098242,70,20,12420,142320790,522,445,8118,291,60078
131,United Kingdom,UKD,194000,2,5015,215,47,30857260,0,68000000000,...,243610,10,9,227,67081000,247,40,1291,67,5501
44,Germany,GER,184000,0,9217,121,130,36755601,5,50300000000,...,357022,6,0,266,79903481,37,43,1155,35,9642


#### Data Preparation Overview:
Before data preparation there were over 40 different measurements of military power from 140 different countries.
- Step taken to clean and prep data:
    1. renamed columns (avoided spacing for easier coding)
    2. inspected data frame and found zeros or outliers where they likely should not be such as:
        1. changed to mean of entire feature.
        2. replaced outliers with zero.
    3. created additional features by grouping some that could be fit into one feature such as:
        1. attack_aircraft = any attack type aircraft
        2. air_carriers = any air type carriers
        3. arty = any type artillery
        4. grouped air, land, and sea strengths into each individual category
    4. Dropped columns that were not neccessary for this quick analysis (please see prepare.py file for feature drops)
    5. resorted the column titles.
    6. Only took the top 2