### Extracting the Country Codes & Regions Dataset
This notebook fetches country information from the REST Countries API, simplifies the data to essential fields (country name, ISO codes, region) and saves it for further analysis or use in data projects.

In [1]:
# Requests: fetch data from APIs; JSON: parse and save data in JSON format
import requests
import json

# Correct API URL with fields
url = "https://restcountries.com/v3.1/all?fields=name,cca2,cca3,region"
response = requests.get(url)

# Check response status
if response.status_code != 200:
    print("Error fetching data:", response.status_code, response.text)
    raise SystemExit

countries_data = response.json()

# Extract relevant info
simplified_data = []
for c in countries_data:
    country_info = {
        "country": c.get("name", {}).get("common") if c.get("name") else None,
        "cca2": c.get("cca2"),
        "cca3": c.get("cca3"),
        "region": c.get("region"),
        "postal_code_format": None  # Placeholder, since API doesn't include postal codes
    }
    simplified_data.append(country_info)

# Save as JSON
with open("countries_info.json", "w", encoding="utf-8") as f:
    json.dump(simplified_data, f, indent=4)

print("countries_info.json created successfully!")


countries_info.json created successfully!


In [3]:
# Loading the JSON
with open("countries_info.json", "r", encoding="utf-8") as f:
    countries = json.load(f)

# See how many countries
print(f"Total countries: {len(countries)}")

# See the first 5 entries
for c in countries[:5]:
    print(c)


Total countries: 250
{'country': 'Andorra', 'cca2': 'AD', 'cca3': 'AND', 'region': 'Europe', 'postal_code_format': None}
{'country': 'Trinidad and Tobago', 'cca2': 'TT', 'cca3': 'TTO', 'region': 'Americas', 'postal_code_format': None}
{'country': 'Serbia', 'cca2': 'RS', 'cca3': 'SRB', 'region': 'Europe', 'postal_code_format': None}
{'country': 'United Kingdom', 'cca2': 'GB', 'cca3': 'GBR', 'region': 'Europe', 'postal_code_format': None}
{'country': 'Russia', 'cca2': 'RU', 'cca3': 'RUS', 'region': 'Europe', 'postal_code_format': None}


In [4]:
import pandas as pd

df_countries = pd.DataFrame(countries)

# Preview the first rows
print(df_countries.head())

# See columns
print(df_countries.columns)

# Summary of regions
print(df_countries['region'].value_counts())


               country cca2 cca3    region postal_code_format
0              Andorra   AD  AND    Europe               None
1  Trinidad and Tobago   TT  TTO  Americas               None
2               Serbia   RS  SRB    Europe               None
3       United Kingdom   GB  GBR    Europe               None
4               Russia   RU  RUS    Europe               None
Index(['country', 'cca2', 'cca3', 'region', 'postal_code_format'], dtype='object')
Africa       59
Americas     56
Europe       53
Asia         50
Oceania      27
Antarctic     5
Name: region, dtype: int64


In [13]:
# Select only the relevant columns
Countries_df = df_countries[["cca2", "country", "region"]]

In [14]:
# To glance through the whole table
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

display(Countries_df)

Unnamed: 0,cca2,country,region
0,AD,Andorra,Europe
1,TT,Trinidad and Tobago,Americas
2,RS,Serbia,Europe
3,GB,United Kingdom,Europe
4,RU,Russia,Europe
5,HK,Hong Kong,Asia
6,EH,Western Sahara,Africa
7,UY,Uruguay,Americas
8,BB,Barbados,Americas
9,PS,Palestine,Asia


In [15]:
# Library to manage database connections and execute SQL queries
from sqlalchemy import create_engine

# SQL Server connection
server = "*******\\SQLEXPRESS"
database = "DWETL"

connection_string = f"mssql+pyodbc://@{server}/{database}?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes"
engine = create_engine(connection_string)

# Load Countries_df into SQL Server
Countries_df.to_sql(
    'CountryCodes',    
    con=engine,
    schema='dbo',
    if_exists='replace', 
    index=False
)

print("Countries_df with region loaded successfully into SQL Server!")


Countries_df with region loaded successfully into SQL Server!
