In [None]:
!pip -q install requests beautifulsoup4 pandas sqlalchemy psycopg2-binary

installed postgres on my local laptop. after that ran this to create a database= & "D:\postgre\bin\psql.exe" -U postgres -h localhost -p 5432 -c "CREATE DATABASE samsung;"

to connect db here

In [3]:
from sqlalchemy import create_engine, text

DATABASE_URL = "postgresql+psycopg2://postgres:postgres@localhost:5432/samsung"
engine = create_engine(DATABASE_URL, future=True)

with engine.connect() as conn:
    print(conn.execute(text("SELECT 1")).fetchall())

[(1,)]


In [None]:
from sqlalchemy import text

create_table_sql = """
CREATE TABLE IF NOT EXISTS phones (
    id SERIAL PRIMARY KEY,
    model_name TEXT UNIQUE NOT NULL,
    release_date TEXT,
    display TEXT,
    battery TEXT,
    camera TEXT,
    ram TEXT,
    storage TEXT,
    price TEXT,
    source_url TEXT,

    battery_mah INT,
    main_camera_mp INT,
    price_usd INT
);
"""

with engine.begin() as conn:
    conn.execute(text(create_table_sql))

print("Table created: phones")

✅ Table created: phones


In [None]:
import re, time, requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from sqlalchemy import text

BASE = "https://www.gsmarena.com/"
HEADERS = {"User-Agent": "Mozilla/5.0"}

def get_soup(url):
    r = requests.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()
    return BeautifulSoup(r.text, "html.parser")

def parse_battery_mah(txt):
    m = re.search(r"(\d{4,6})\s*mAh", str(txt), re.I)
    return int(m.group(1)) if m else None

def parse_main_camera_mp(txt):
    m = re.search(r"(\d+)\s*MP", str(txt), re.I)
    return int(m.group(1)) if m else None

def parse_price_usd(txt):
    if not txt: 
        return None
    m = re.search(r"(\d{3,5})", str(txt).replace(",", ""))
    return int(m.group(1)) if m else None

def collect_samsung_urls(limit=25):
    url = urljoin(BASE, "samsung-phones-9.php")
    soup = get_soup(url)

    urls = []
    for a in soup.select("div.makers a"):
        href = a.get("href")
        if href and href.endswith(".php"):
            urls.append(urljoin(BASE, href))
        if len(urls) >= limit:
            break
    return urls

def parse_phone_page(url):
    soup = get_soup(url)

    name_tag = soup.select_one("h1.specs-phone-name-title")
    model_name = name_tag.get_text(strip=True) if name_tag else None

    specs = {}
    for row in soup.select("#specs-list tr"):
        k = row.select_one("td.ttl")
        v = row.select_one("td.nfo")
        if k and v:
            specs[k.get_text(" ", strip=True)] = v.get_text(" ", strip=True)

    release_date = specs.get("Announced") or specs.get("Status")
    display = " | ".join(filter(None, [specs.get("Type"), specs.get("Size"), specs.get("Resolution")]))

    battery = specs.get("Type")
    camera = specs.get("Single") or specs.get("Dual") or specs.get("Triple") or specs.get("Quad") or specs.get("Main Camera")

    internal = specs.get("Internal")
    ram = None
    storage = internal

    if internal:
        m = re.search(r"(\d+)\s*GB\s*RAM", internal, re.I)
        ram = f"{m.group(1)}GB" if m else None

    price = specs.get("Price")

    return {
        "model_name": model_name,
        "release_date": release_date,
        "display": display,
        "battery": battery,
        "camera": camera,
        "ram": ram,
        "storage": storage,
        "price": price,
        "source_url": url,
        "battery_mah": parse_battery_mah(battery),
        "main_camera_mp": parse_main_camera_mp(camera),
        "price_usd": parse_price_usd(price),
    }

In [7]:
insert_sql = """
INSERT INTO phones
(model_name, release_date, display, battery, camera, ram, storage, price, source_url,
 battery_mah, main_camera_mp, price_usd)
VALUES
(:model_name, :release_date, :display, :battery, :camera, :ram, :storage, :price, :source_url,
 :battery_mah, :main_camera_mp, :price_usd)
ON CONFLICT (model_name) DO NOTHING;
"""

urls = collect_samsung_urls(limit=25)
print("Collected URLs:", len(urls))

records = []
for i, u in enumerate(urls, start=1):
    try:
        rec = parse_phone_page(u)
        print(f"[{i}] {rec['model_name']}")
        if rec["model_name"]:
            records.append(rec)
        time.sleep(1)  # polite
    except Exception as e:
        print("Failed:", u, e)

with engine.begin() as conn:
    conn.execute(text(insert_sql), records)

print("Inserted records:", len(records))

Collected URLs: 25
[1] Samsung Galaxy S26 Ultra
[2] Samsung Galaxy S26+
[3] Samsung Galaxy F70e
[4] Samsung Galaxy S26
[5] Samsung Galaxy A07
[6] Samsung Galaxy Z TriFold
[7] Samsung Galaxy M17
[8] Samsung Galaxy F07
[9] Samsung Galaxy M07
[10] Samsung Galaxy A17 4G
[11] Samsung Galaxy Tab A11+
[12] Samsung Galaxy Tab A11
[13] Samsung Galaxy F17
[14] Samsung Galaxy S25 FE
[15] Samsung Galaxy Tab S11 Ultra
[16] Samsung Galaxy Tab S11
[17] Samsung Galaxy Tab S10 Lite
[18] Samsung Galaxy A07 4G
[19] Samsung Galaxy A17
[20] Samsung Galaxy F36
[21] Samsung Galaxy Z Fold7
[22] Samsung Galaxy Z Flip7
[23] Samsung Galaxy Z Flip7 FE
[24] Samsung Galaxy Watch8 Classic
[25] Samsung Galaxy Watch8
✅ Inserted records: 25


In [8]:
with engine.connect() as conn:
    total = conn.execute(text("SELECT COUNT(*) FROM phones")).scalar()
print("Total phones in DB:", total)

Total phones in DB: 25
