In [4]:
import pandas as pd
import re

# Load the CSV
df = pd.read_csv("battle_ready_countries.csv")

# Clean column names
df.columns = df.columns.str.strip()

# Print to confirm names
print("✅ Columns in file:", df.columns.tolist())

# Extractors
def extract_gdp(economy_str):
    match = re.search(r"\$([\d.,]+)\s*billion", str(economy_str))
    if match:
        return float(match.group(1).replace(",", "")) * 1e9
    return None

def extract_percentage(text):
    match = re.search(r"([\d.]+)\s*%", str(text))
    if match:
        return float(match.group(1))
    return None

def extract_birth_or_death(value):
    match = re.search(r"([\d.]+)", str(value))
    if match:
        return float(match.group(1))
    return None

def extract_literacy(value):
    match = re.search(r"([\d.]+)", str(value))
    if match:
        return float(match.group(1))
    return None

# Apply transformations
df['gdp'] = df['Economy: Real GDP (purchasing power parity)'].apply(extract_gdp)
df['military'] = df['Military and Security: Military expenditures'].apply(extract_percentage)
df['birth'] = df['People and Society: Birth rate'].apply(extract_birth_or_death)
df['death'] = df['People and Society: Death rate'].apply(extract_birth_or_death)
df['literacy'] = df['People and Society: Literacy - total population'].apply(extract_literacy)

# Keep only final columns
df = df[['Country', 'gdp', 'military', 'literacy', 'birth', 'death']]

# Check missing data
missing = df[df.isnull().any(axis=1)]
print("⚠️ Missing values in rows:")
print(missing['Country'])

# Save cleaned file
df.to_csv("battle_ready_countries_cleaned.csv", index=False)
print("✅ Saved: battle_ready_countries_cleaned.csv")


✅ Columns in file: ['Country', 'Economy: Real GDP (purchasing power parity)', 'Military and Security: Military expenditures', 'People and Society: Literacy - total population', 'People and Society: Birth rate', 'People and Society: Death rate']
⚠️ Missing values in rows:
3           American Samoa
4                  Andorra
6      Antigua and Barbuda
7                Argentina
9                    Aruba
              ...         
215                Vanuatu
217                Vietnam
218         Virgin Islands
219              West Bank
220                  Yemen
Name: Country, Length: 105, dtype: object
✅ Saved: battle_ready_countries_cleaned.csv
