In [None]:
import time
import requests
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from colorthief import ColorThief
import webcolors

# === Fungsi helper: RGB ke nama warna ===
def closest_color(requested_color):
    min_colors = {}
    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - requested_color[0]) ** 2
        gd = (g_c - requested_color[1]) ** 2
        bd = (b_c - requested_color[2]) ** 2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]

def get_color_name(rgb_color):
    try:
        return closest_color(rgb_color)
    except Exception:
        return 'unknown'

# === Setup Chrome ===
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# === Login manual ===
driver.get('https://www.instagram.com/')
print("🔑 Silakan login manual dulu...")
time.sleep(40)

# === Profil target ===
username_target = 'kfcindonesia'
profile_url = f'https://www.instagram.com/{username_target}/'
driver.get(profile_url)
time.sleep(5)

# === Ambil kategori akun dari profil ===
profile_soup = BeautifulSoup(driver.page_source, 'html.parser')
try:
    kategori_akun = profile_soup.find('div', {'class': '_aa_c'}).text.strip()
except:
    kategori_akun = ''
print(f"✅ Kategori akun: {kategori_akun}")

# === Ambil link dari FEED UTAMA ===
print("📌 Mengambil link dari feed utama...")
post_links = []
scroll_times = 50
for _ in range(scroll_times):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

posts_feed = driver.find_elements(By.XPATH, '//a[contains(@href, "/p/") or contains(@href, "/reel/")]')
for post in posts_feed:
    href = post.get_attribute('href')
    if href not in post_links:
        post_links.append(href)

print(f"✅ Dari feed utama: {len(post_links)}")

# === Buka TAB REELS khusus ===
print("📌 Membuka tab reels khusus...")
reels_links = []
reels_url = f'https://www.instagram.com/{username_target}/reels/'
driver.get(reels_url)
time.sleep(5)

for _ in range(scroll_times):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

reels_posts = driver.find_elements(By.XPATH, '//a[contains(@href, "/reel/")]')
for reel in reels_posts:
    href = reel.get_attribute('href')
    if href not in post_links and href not in reels_links:
        reels_links.append(href)

print(f"✅ Dari tab reels: {len(reels_links)}")

# === Gabung semua ===
post_links.extend(reels_links)
print(f"📌 Total post + reels: {len(post_links)}")

# === Batasi jika perlu ===
post_links = post_links[:100]

# === DATA OUTPUT ===
data = []

# === Loop setiap post ===
for idx, link in enumerate(post_links):
    driver.get(link)
    time.sleep(5)

    if '/reel/' in link:
        media_type = 'reel'
    elif '/p/' in link:
        media_type = 'post'
    else:
        media_type = ''

    while True:
        try:
            load_more = driver.find_element(
                By.XPATH,
                "//button[.//svg[@aria-label='Muat komentar lainnya'] or .//svg[@aria-label='Load more comments']]"
            )
            load_more.click()
            time.sleep(2)
        except NoSuchElementException:
            break

    soup = BeautifulSoup(driver.page_source, 'html.parser')

    brand = username_target

    try:
        caption = soup.find('h1', class_='_ap3a _aaco _aacu _aacx _aad7 _aade').text
    except:
        caption = ''

    try:
        likes_div = soup.find('span', class_='x193iq5w xeuugli x1fj9vlw x13faqbe x1vvkbs xt0psk2 x1i0vuye xvs91rp x1s688f x5n08af x10wh9bi xpm28yp x8viiok x1o7cslx')  # Ganti class sesuai inspect terbaru
        likes = likes_div.text if likes_div else ''
    except:
        likes = ''

    try:
        comments_li = soup.find_all('li', class_='_a9zj _a9zl')
        comments_count = len(comments_li)
    except:
        comments_count = 0

    try:
        media = soup.find('img')
        if media:
            media_url = media['src']
        else:
            video = soup.find('video')
            media_url = video['src'] if video else ''
    except:
        media_url = ''

    dominant_color = ''
    color_name = ''
    if media_url and media_type == 'post':
        try:
            response = requests.get(media_url)
            img = BytesIO(response.content)
            ct = ColorThief(img)
            dominant_color = ct.get_color(quality=1)
            color_name = get_color_name(dominant_color)
        except:
            dominant_color = ''
            color_name = ''

    try:
        upload_time = soup.find('time')['datetime']
    except:
        upload_time = ''

    content_category = f'{brand} {media_type}'

    data.append((
        brand, link, caption, likes, comments_count, media_url, media_type,
        str(dominant_color), color_name, content_category, upload_time, kategori_akun
    ))

    print(f"[{idx+1}] ✅ {link} | {media_type} | Dominant: {dominant_color} | Name: {color_name}")
    time.sleep(2)

# === Save ke Excel ===
df = pd.DataFrame(data, columns=[
    'brand', 'url_post', 'caption', 'likes', 'comments_count',
    'media_url', 'media_type', 'dominant_color', 'color_name',
    'content_category', 'upload_time', 'profile_category'
])

df.to_excel(f'konten_ig_{username_target}.xlsx', index=False)
print(f"\n✅ Selesai! Data disimpan ke 'konten_ig_{username_target}.xlsx'")

driver.quit()


OSError: [WinError 193] %1 is not a valid Win32 application