In [185]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

# Global Firepower 2021
With information from [Global Firepower](https://www.globalfirepower.com/) this jupyter intends to scrap the countries' data for their analysis.

In [186]:
def gfp_indicator_request(indicator, url):
    """
    Gets the name of indicator and the link and then scraps the data for said indicator.
    It reagrups de scrapped information inside a dataframe.
    
    # ARGUMENTS
    indicator: (str) The name of the indicator
    link: (str) String of the url
    """
    # Request
    gfp_request = requests.get(url)
    soup = BeautifulSoup(gfp_request.text, 'lxml')
    # Find the arguments for webscraping
    countries = soup.find_all('div', class_=dic_scraper['main'])
    # Search by country and value of the indicator
    data = [[countries[i].find('div', dic_scraper[key]).span.text.strip() 
             for key in list(dic_scraper.keys())[2:]] for i in range(len(countries))]
    countries_df = pd.DataFrame(data, columns=['Country', indicator])
    return countries_df

In [200]:
def indicators_dataframe_bygroup(dataframes, on_="Country"):
    """
    It takes a dictionary with indicators as keys, and dataframes as values
    and merges them all in a unique dataframe.
    
    # ARGUMENTS
    dataframes: (dict) A dictionary of dataframes by indicators
    on_: (str) A string used for merging the dataframes by the name of a column
    """
    for i in range(1, len(dataframes)):
        # Indicators' list
        indicadores = list(dataframes.keys())
        if i == 1:
            # Merges the previous dataframe with the current dataframe
            merged_df = pd.merge(dataframes[indicadores[i-1]],
                                 dataframes[indicadores[i]], on=on_)
        else:
            # Merges the (already) merged dataframe with the current dataframe
            merged_df = pd.merge(merged_df, 
                                 dataframes[indicadores[i]], on=on_)
    # Return the merged dataframe
    return merged_df

In [222]:
# Dictionary for web scraping arguments
dic_scraper = {
    'main': 'picTrans recordsetContainer boxShadow',
    'rank': 'rankNumContainer',
    'country name': 'countryNameContainer',
    'value': 'valueContainer',
    }

# Global Fire Power dictionary
gfp_dic = {
    'Manpower': {
        'Total population': 'https://www.globalfirepower.com/total-population-by-country.php',
        'Reaching Military Age': 'https://www.globalfirepower.com/manpower-reaching-military-age-annually.php',
        'Active Service': 'https://www.globalfirepower.com/active-military-manpower.php',
        'Active Reserves': 'https://www.globalfirepower.com/active-reserve-military-manpower.php',
        'Paramilitary': 'https://www.globalfirepower.com/manpower-paramilitary.php',
        },
    'Equipment': {
        # Airpower
        'Fighters/Interceptors': 'https://www.globalfirepower.com/aircraft-total-fighters.php',
        'Attack/Strike': 'https://www.globalfirepower.com/aircraft-total-attack-types.php',
        'Helicopter Fleets': 'https://www.globalfirepower.com/aircraft-helicopters-total.php',
        # Landpower
        'Armored Fighting Vehicles': 'https://www.globalfirepower.com/armor-apc-total.php',
        'Towed Artillery': 'https://www.globalfirepower.com/armor-towed-artillery-total.php',
        'Rocket Projectors': 'https://www.globalfirepower.com/armor-mlrs-total.php',
        # Navy
        'Submarines': 'https://www.globalfirepower.com/navy-submarines.php',
        'Frigates': 'https://www.globalfirepower.com/navy-frigates.php',
        'Corvettes': 'https://www.globalfirepower.com/navy-corvettes.php',
        },
    'Finances': {
        # All in (USD)
        'Defense Budgets': 'https://www.globalfirepower.com/defense-spending-budget.php',
        'External Debt': 'https://www.globalfirepower.com/external-debt-by-country.php',
        'Purchasing Power Parity': 'https://www.globalfirepower.com/purchasing-power-parity.php',
        'Reserves of Foreign Exchange & Gold': 'https://www.globalfirepower.com/reserves-of-foreign-exchange-and-gold.php',
        },
    'Logistics': {
        'Airports': 'https://www.globalfirepower.com/major-serviceable-airports-by-country.php',
        'Labor Force': 'https://www.globalfirepower.com/labor-force-by-country.php',
        'Merchant Marine Strength': 'https://www.globalfirepower.com/merchant-marine-strength-by-country.php',
        # (km)
        'Railway Coverage': 'https://www.globalfirepower.com/railway-coverage.php',
        # (km)
        'Roadway Coverage': 'https://www.globalfirepower.com/roadway-coverage.php',
        },
    'Natural Resources and Geography': {
        # Barrels Per Day (bbl/day)
        'Oil Production': 'https://www.globalfirepower.com/oil-production-by-country.php',
        # Barrels Per Day (bbl/day)
        'Oil Consumption': 'https://www.globalfirepower.com/oil-consumption-by-country.php',
        # (km2)
        'Square Land Area': 'https://www.globalfirepower.com/square-land-area.php',
        },
    }

In [276]:
# Create the all indicators of gfp_dic dataset
dataframes_bygroup = {}
for group in gfp_dic.keys():
    print('{}: DONE'.format(group))
    for indicator in gfp_dic[group].keys():
        dataframes_bygroup[indicator] = gfp_indicator_request(indicator, gfp_dic[group][indicator])
print('ALL DONE')
# Merge all the dataframes into one
gfp_df = indicators_dataframe_bygroup(dataframes_bygroup)
# Replace 'html' symbols and spaces
gfp_df[gfp_df.columns[1:]] = gfp_df[gfp_df.columns[1:]].replace('[(\\t)(\\n)(\\r)(\s)(km)(bbl)(\$)(,)]', '', regex=True)
# Rename the columns with units
columns_dict_byunits = {
    'Defense Budget': 'Defense Budget (USD)',
    'External Debt': 'External Debt (USD)',
    'Purchasing Power Parity': 'Purchasing Power Parity (USD)',
    'Reserves of Foreign Exchange & Gold': 'Reserves of Foreign Exchange & Gold (USD)',
    'Railway Coverage': 'Railway Coverage (km)',
    'Roadway Coverage': 'Roadway Coverage (km)',
    'Oil Production': 'Oil Production (bbl)',
    'Oil Consumption': 'Oil Consumption (bbl)',
    'Square Land Area': 'Square Land Area (km2)',
    }
gfp_df = gfp_df.rename(columns = columns_dict_byunits)
# Change the string-object type of variables to float
gfp_df[gfp_df.columns[1:]] = gfp_df[gfp_df.columns[1:]].astype('float64')
gfp_df.to_csv('Global Fire Power.csv')
gfp_df

Manpower: DONE
Equipment: DONE
Finances: DONE
Logistics: DONE
Natural Resources and Geography: DONE
ALL DONE


Unnamed: 0,Country,Total population,Reaching Military Age,Active Service,Active Reserves,Paramilitary,Fighters/Interceptors,Attack/Strike,Helicopter Fleets,Armored Fighting Vehicles,...,Purchasing Power Parity (USD),Reserves of Foreign Exchange & Gold (USD),Airports,Labor Force,Merchant Marine Strength,Railway Coverage (km),Roadway Coverage (km),Oil Production (bbl),Oil Consumption (bbl),Square Land Area (km2)
0,China,1.394016e+09,19752649.0,2185000.0,510000.0,660000.0,1200.0,371.0,902.0,35000.0,...,2.540000e+13,3.236000e+12,507.0,774710000.0,5594.0,86000.0,3860800.0,3775000.0,13500000.0,9596961.0
1,India,1.326093e+09,23141786.0,1445000.0,1155000.0,2527000.0,542.0,130.0,775.0,10000.0,...,9.500000e+12,4.098000e+11,346.0,521900000.0,1731.0,63974.0,3320410.0,710000.0,5000000.0,3287263.0
2,United States,3.326391e+08,4251793.0,1400000.0,845500.0,0.0,1956.0,761.0,5436.0,40000.0,...,1.950000e+13,1.233000e+11,13513.0,146128000.0,3673.0,224792.0,6586610.0,11000000.0,20000000.0,9826675.0
3,Indonesia,2.670264e+08,4561184.0,400000.0,400000.0,280000.0,41.0,38.0,188.0,1430.0,...,3.402500e+12,1.302000e+11,673.0,129366000.0,9879.0,5042.0,437759.0,775000.0,1630000.0,1904569.0
4,Pakistan,2.235006e+08,4327620.0,654000.0,550000.0,500000.0,357.0,90.0,331.0,9635.0,...,1.110500e+12,1.846000e+10,151.0,61710000.0,54.0,7791.0,260760.0,90000.0,575000.0,796095.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,Bahrain,1.505003e+06,17270.0,8200.0,0.0,11250.0,17.0,0.0,53.0,845.0,...,7.300000e+10,2.349000e+09,4.0,831600.0,261.0,0.0,4122.0,40000.0,55000.0,760.0
136,Estonia,1.228624e+06,14087.0,6500.0,12000.0,16000.0,0.0,0.0,0.0,180.0,...,4.200000e+10,3.450000e+08,18.0,648000.0,69.0,1196.0,58412.0,0.0,27500.0,45228.0
137,Bhutan,7.823180e+05,4210.0,7000.0,0.0,0.0,0.0,0.0,2.0,25.0,...,7.700000e+09,1.206000e+09,2.0,397900.0,0.0,0.0,10578.0,0.0,2000.0,38394.0
138,Montenegro,6.098590e+05,3368.0,2000.0,0.0,10000.0,0.0,0.0,10.0,100.0,...,1.150000e+10,1.077000e+09,5.0,167000.0,12.0,250.0,7762.0,0.0,4860.0,13812.0
