In [5]:
import requests
from bs4 import BeautifulSoup

url = "https://www.rbz.co.zw/index.php/research/markets/exchange-rates"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Example: find inflation mention
for p in soup.find_all('p'):
    if "inflation" in p.text.lower():
        print(p.text.strip())


In [10]:
import requests
import pdfplumber
import pandas as pd
import os
from datetime import datetime, timedelta
from tqdm import tqdm

# Output CSV
output_csv = "rbz_exchange_rates.csv"

# Date range
start_date = datetime(2025, 10, 31)
end_date = datetime(2025, 10, 31)

# Folder to store downloaded PDFs
os.makedirs("rbz_pdfs", exist_ok=True)

# Base URL pattern
base_url = "https://www.rbz.co.zw/documents/Exchange_Rates/{year}/{month}/RATES_{day}_{month_upper}_{year}.pdf"

# Store extracted data
records = []

for single_date in tqdm(pd.date_range(start_date, end_date)):
    year = single_date.strftime("%Y")
    month = single_date.strftime("%B")
    day = single_date.strftime("%d")
    month_upper = month.upper()

    url = base_url.format(year=year, month=month, month_upper=month_upper, day=day)

    # Download PDF
    pdf_path = f"rbz_pdfs/RATES_{day}_{month_upper}_{year}.pdf"
    response = requests.get(url)

    if response.status_code == 200 and response.headers["Content-Type"] == "application/pdf":
        with open(pdf_path, "wb") as f:
            f.write(response.content)

        # Extract data from PDF
        try:
            with pdfplumber.open(pdf_path) as pdf:
                first_page = pdf.pages[0]
                table = first_page.extract_table()
                if table:
                    df = pd.DataFrame(table[1:], columns=table[0])
                    df["Date"] = single_date.strftime("%Y-%m-%d")
                    records.append(df)
        except Exception as e:
            print(f"Error reading {pdf_path}: {e}")

# Combine all extracted tables
if records:
    # Standardize columns and combine safely
    combined_df = pd.concat(records, ignore_index=True, sort=False)
    combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]
    combined_df.to_csv(output_csv, index=False)
    print(f"✅ Done! Saved {len(combined_df)} rows to {output_csv}")
else:
    print("⚠️ No data extracted. Check PDF URLs or connection.")



100%|██████████| 1/1 [00:01<00:00,  1.37s/it]

✅ Done! Saved 44 rows to rbz_exchange_rates.csv





In [17]:
print(table)

None


In [None]:
22840469+17303565s

40144034

In [14]:
47_813_701_094
15_272_017_573/6_111_936

2498.7201392488405

In [15]:
47_813_701_094/19_135_278

2498.719960797016

In [18]:
749_469/85_973

8.717492701196887

In [1]:
13969/541

25.820702402957487

In [2]:
63865622*2498.72

159582307003.84

In [None]:
159_582_307_003