In [1]:
import time
import requests
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from colorthief import ColorThief
import webcolors

# === Fungsi helper: RGB ke nama warna ===
def closest_color(requested_color):
    min_colors = {}
    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - requested_color[0]) ** 2
        gd = (g_c - requested_color[1]) ** 2
        bd = (b_c - requested_color[2]) ** 2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]

def get_color_name(rgb_color):
    try:
        return closest_color(rgb_color)
    except Exception:
        return 'unknown'

# === Setup Chrome ===
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# === Login manual ===
driver.get('https://www.instagram.com/')
print("🔑 Silakan login manual dulu...")
time.sleep(40)

# === Profil target ===
username_target = 'dominos_id'
profile_url = f'https://www.instagram.com/{username_target}/'
driver.get(profile_url)
time.sleep(5)

# === Ambil kategori akun dari profil ===
profile_soup = BeautifulSoup(driver.page_source, 'html.parser')
try:
    # Ganti class sesuai hasil inspect element di browser
    kategori_akun = profile_soup.find('div', {'class': '_aa_c'}).text.strip()
except:
    kategori_akun = ''
print(f"✅ Kategori akun: {kategori_akun}")

# === Scroll agar semua post muncul ===
scroll_times = 50
for _ in range(scroll_times):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)

# === Ambil semua link post & reels ===
post_links = []
posts = driver.find_elements(By.XPATH, '//a[contains(@href, "/p/") or contains(@href, "/reel/")]')
for post in posts:
    href = post.get_attribute('href')
    if href not in post_links:
        post_links.append(href)

print(f"✅ Total post ditemukan: {len(post_links)}")

# Batasi (misal ambil 100 post)
post_links = post_links[:100]

# === DATA OUTPUT ===
data = []

# === Loop setiap post ===
for idx, link in enumerate(post_links):
    driver.get(link)
    time.sleep(5)

    # Tentukan jenis konten
    if '/reel/' in link:
        media_type = 'reel'
    elif '/p/' in link:
        media_type = 'post'
    else:
        media_type = ''

    # Klik "Muat komentar lainnya"
    while True:
        try:
            load_more = driver.find_element(
                By.XPATH,
                "//button[.//svg[@aria-label='Muat komentar lainnya'] or .//svg[@aria-label='Load more comments']]"
            )
            load_more.click()
            time.sleep(2)
        except NoSuchElementException:
            break

    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Ambil data
    brand = username_target

    try:
        caption = soup.find('h1', class_='_ap3a _aaco _aacu _aacx _aad7 _aade').text
    except:
        caption = ''

    try:
        likes_div = soup.find('a', class_='x1i10hfl xjbqb8w ...')  # Ganti class sesuai hasil inspect
        likes = likes_div.text if likes_div else ''
    except:
        likes = ''

    try:
        comments_divs = soup.find_all('div', class_='x9f619 xjbqb8w x78zum5 x15mokao x1ga7v0g x16uus16 xbiv7yw x1yztbdb x1uhb9sk x1plvlek xryxfnj x1c4vz4f x2lah0s xdt5ytf xqjyukv x1qjc9v5 x1oa3qoh x1nhvcw1')  # Ganti class sesuai hasil inspect
        comments_count = len(comments_divs)
    except:
        comments_count = 0

    try:
        media = soup.find('img')
        if media:
            media_url = media['src']
        else:
            video = soup.find('video')
            media_url = video['src'] if video else ''
    except:
        media_url = ''

    # Ambil dominant color
    dominant_color = ''
    color_name = ''
    if media_url and media_type == 'post':
        try:
            response = requests.get(media_url)
            img = BytesIO(response.content)
            ct = ColorThief(img)
            dominant_color = ct.get_color(quality=1)
            color_name = get_color_name(dominant_color)
        except:
            dominant_color = ''
            color_name = ''

    try:
        upload_time = soup.find('time')['datetime']
    except:
        upload_time = ''

    content_category = 'Dominos post'

    data.append((
        brand, link, caption, likes, comments_count, media_url, media_type,
        str(dominant_color), color_name, content_category, upload_time, kategori_akun
    ))

    print(f"[{idx+1}] ✅ {link} | {media_type} | Dominant: {dominant_color} | Name: {color_name}")
    time.sleep(2)

# === Save ke Excel ===
df = pd.DataFrame(data, columns=[
    'brand', 'url_post', 'caption', 'likes', 'comments_count',
    'media_url', 'media_type', 'dominant_color', 'color_name',
    'content_category', 'upload_time', 'profile_category'
])

df.to_excel('konten_ig_Dominos.xlsx', index=False)
print("\n✅ Selesai! Data disimpan ke 'konten_ig_Dominos.xlsx'")

driver.quit()


🔑 Silakan login manual dulu...
✅ Kategori akun: 
✅ Total post ditemukan: 41
[1] ✅ https://www.instagram.com/dominos_id/p/C3ZqBC6toSI/ | post | Dominant: (58, 161, 220) | Name: unknown
[2] ✅ https://www.instagram.com/dominos_id/p/C3ZK8FpteFj/ | post | Dominant: (210, 212, 194) | Name: unknown
[3] ✅ https://www.instagram.com/dominos_id/p/C3XH07svOTm/ | post | Dominant: (36, 97, 123) | Name: unknown
[4] ✅ https://www.instagram.com/dominos_id/p/C3R7pbZO4CW/ | post | Dominant: (229, 209, 209) | Name: unknown
[5] ✅ https://www.instagram.com/dominos_id/p/C3RZRTmIwgl/ | post | Dominant: (239, 205, 199) | Name: unknown
[6] ✅ https://www.instagram.com/dominos_id/p/C3Pkn_2stp3/ | post | Dominant: (244, 215, 201) | Name: unknown
[7] ✅ https://www.instagram.com/dominos_id/p/C3OmvTDsU3y/ | post | Dominant: (227, 173, 137) | Name: unknown
[8] ✅ https://www.instagram.com/dominos_id/reel/C3JdNz-S_gz/ | reel | Dominant:  | Name: 
[9] ✅ https://www.instagram.com/dominos_id/p/C3HXQMPtQxR/ | post | Dominan