In [None]:
import requests
from bs4 import BeautifulSoup
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

url = "https://en.wikipedia.org/wiki/List_of_countries_by_level_of_military_equipment"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')

content = soup.find(id="mw-content-text")
tbody_elements = content.find_all('tbody')
country_data = []

# Loop through each <tbody> to find the relevant rows
for tbody in tbody_elements:
    rows = tbody.find_all('tr')
    
    for row in rows:
        # Extract the first column which contains the country name
        country_name_cell = row.find('a')
        if country_name_cell:
            country_name = country_name_cell.text.strip()
            
            # Extract the numerical data from the remaining <td> elements
            data_cells = row.find_all('td')[1:]  # Skip the first <td> (country name)
            row_data = [td.text.strip() for td in data_cells if td.text.strip() != ""]
            
            # Append the country name and data as a list
            if row_data:
                country_data.append([country_name] + row_data)

# Remove the first 3 faulty arrays
if len(country_data) > 3:
    country_data = country_data[3:]

# Step 3: Fix the issue with the 3rd element containing a dot '.'
for entry in country_data:
    if len(entry) > 2 and '.' in entry[2]:
        # Combine the 2nd and 3rd elements into a float
        entry[1] = str(float(entry[1] + entry[2]))
        # Remove the 3rd element
        entry.pop(2)

# Remove all table data that isnt the military capabilities of the 171 countries
country_data = country_data[:171]

# remove all citations
for entry in country_data:
    if entry[2] == '[j]':
        entry.pop(2)

print(country_data[-8])

In [None]:
# Filter out countries with zero aircraft carriers (4th element)
countries = []
aircraft_carriers = []

for entry in country_data:
    if len(entry) > 3 and entry[3].isdigit() and int(entry[3]) > 0:
        countries.append(entry[0])
        aircraft_carriers.append(int(entry[3]))

# Step 5: Create a DataFrame
df = pd.DataFrame({
    'Country': countries,
    'Aircraft Carriers': aircraft_carriers
})

# Step 6: Plot the bar chart using Seaborn
plt.figure(figsize=(10,6))
sns.barplot(x='Country', y='Aircraft Carriers', data=df)
plt.xticks(rotation=90)  # Rotate country names for better readability
plt.title('Number of Aircraft Carriers by Country')
plt.show()


In [None]:
submarine_data = []

for entry in country_data:
    if len(entry) > 10 and entry[9].isdigit() and entry[10].isdigit():
        submarines = int(entry[9]) + int(entry[10])
        if submarines > 0:
            submarine_data.append([entry[0], submarines])

# Step 3: Sort by the total number of submarines and select the top 10
submarine_data_sorted = sorted(submarine_data, key=lambda x: x[1], reverse=True)[:10]

# Step 4: Create a DataFrame for the top 10 countries
df_submarines = pd.DataFrame(submarine_data_sorted, columns=['Country', 'Total Submarines'])

# Step 5: Plot the bar chart using Seaborn
plt.figure(figsize=(10,6))
sns.barplot(x='Country', y='Total Submarines', data=df_submarines)
plt.xticks(rotation=90)  # Rotate country names for better readability
plt.title('Top 10 Countries by Total Number of Submarines')
plt.show()

In [None]:
satellite_data = []

for entry in country_data:
    if len(entry) > 14 and entry[14].isdigit() and int(entry[14]) > 0:
        satellites = int(entry[14])
        satellite_data.append([entry[0], satellites])

# Step 3: Sort by the number of satellites and select the top 10 countries
satellite_data_sorted = sorted(satellite_data, key=lambda x: x[1], reverse=True)[:10]

# Step 4: Create a DataFrame for the top 10 countries
df_satellites = pd.DataFrame(satellite_data_sorted, columns=['Country', 'Satellites'])

# Step 5: Plot the bar chart using Seaborn
plt.figure(figsize=(10,6))
sns.barplot(x='Country', y='Satellites', data=df_satellites)
plt.xticks(rotation=90)  # Rotate country names for better readability
plt.title('Top 10 Countries by Number of Military Satellites')
plt.show()

In [None]:
# Step 2: Extract relevant data for Ukraine and Russia
def get_relevant_stats(country_name, country_data):
    for entry in country_data:
        if entry[0] == country_name:
            # Military Budget (element 2)
            military_budget = float(entry[1]) if len(entry) > 1 else 0.0
            
            # Main battle tanks (element 3)
            main_battle_tanks = int(entry[2]) if len(entry) > 2 and entry[2].isdigit() else 0
            
            # Naval Assets (sum of elements 5-11)
            naval_assets = sum(int(entry[i]) for i in range(4, 11) if len(entry) > i and entry[i].isdigit())
            
            # Combat Aircraft (element 12)
            combat_aircraft = int(entry[11]) if len(entry) > 11 and entry[11].isdigit() else 0
            
            # Attack Helicopters (element 13)
            attack_helicopters = int(entry[12]) if len(entry) > 12 and entry[12].isdigit() else 0
            
            return [military_budget, main_battle_tanks, naval_assets, combat_aircraft, attack_helicopters]

ukraine_stats = get_relevant_stats("Ukraine", country_data)
russia_stats = get_relevant_stats("Russia", country_data)

# Step 3: Normalize the relevant stats
stats_labels = ["Military Budget (US$ bn)", "Main Battle Tanks", "Naval Assets", "Combat Aircraft", "Attack Helicopters"]
normalized_stats = []

for i in range(len(stats_labels)):
    max_value = max(ukraine_stats[i], russia_stats[i])
    if max_value > 0:
        normalized_ukraine = ukraine_stats[i] / max_value
        normalized_russia = russia_stats[i] / max_value
    else:
        normalized_ukraine = normalized_russia = 0
    normalized_stats.append([stats_labels[i], normalized_ukraine, normalized_russia])

# Step 4: Create a DataFrame for normalized stats
df_normalized = pd.DataFrame(normalized_stats, columns=['Stats', 'Ukraine', 'Russia'])

# Step 5: Convert DataFrame for Seaborn
df_normalized_melted = df_normalized.melt(id_vars='Stats', var_name='Country', value_name='Relative Value')

# Step 6: Plot the normalized bar chart
plt.figure(figsize=(10,6))
sns.barplot(x='Stats', y='Relative Value', hue='Country', data=df_normalized_melted)
plt.xticks(rotation=15)
plt.title('Relative Comparison of Military Stats between Ukraine and Russia')
plt.show()