In [1]:
import time
import requests
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from colorthief import ColorThief
import webcolors
import re

# === Fungsi helper: RGB ke nama warna ===
def closest_color(requested_color):
    min_colors = {}
    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - requested_color[0]) ** 2
        gd = (g_c - requested_color[1]) ** 2
        bd = (b_c - requested_color[2]) ** 2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]

def get_color_name(rgb_color):
    try:
        return closest_color(rgb_color)
    except Exception:
        return 'unknown'

# === Fungsi untuk mengambil profile stats ===
def get_profile_stats(driver):
    stats = {
        'posts': '',
        'followers': '',
        'following': '',
        'profile_pic_url': ''
    }
    
    try:
        # Ambil jumlah post, followers, following
        stats_elements = driver.find_elements(By.XPATH, "//header//section//ul//li")
        
        if len(stats_elements) >= 3:
            stats['posts'] = stats_elements[0].text.split('\n')[0]
            stats['followers'] = stats_elements[1].text.split('\n')[0]
            stats['following'] = stats_elements[2].text.split('\n')[0]
            
        # Ambil profile picture URL
        img_element = driver.find_element(By.XPATH, "//header//img")
        stats['profile_pic_url'] = img_element.get_attribute('src')
        
    except Exception as e:
        print(f"Error getting profile stats: {e}")
    
    return stats

# === Fungsi untuk mengambil comments ===
def get_comments_data(driver, soup):
    comments_data = []
    comments_count = 0
    
    try:
        # Cari semua div yang mungkin berisi komentar
        comment_divs = soup.find_all('div', {'role': 'dialog'}) or soup.find_all('div', {'class': re.compile('comment')})
        
        for div in comment_divs:
            spans = div.find_all('span', dir='auto')
            for span in spans:
                text = span.get_text(strip=True)
                if (text and len(text) > 1 and 
                    not text.startswith('@') and 
                    not any(keyword in text.lower() for keyword in ['like', 'reply', 'follow', 'view'])):
                    comments_data.append(text)
                    comments_count += 1
                    
    except Exception as e:
        print(f"Error getting comments: {e}")
    
    return comments_data, comments_count

# === Fungsi untuk mengambil likes/views ===
def get_engagement_count(soup, media_type):
    try:
        # Untuk post: likes, untuk reel: views
        if media_type == 'post':
            # Cari elemen dengan teks "likes"
            likes_elements = soup.find_all(string=re.compile(r'(\d+,?\d*)\s*(like|suka)', re.I))
            if likes_elements:
                numbers = re.findall(r'[\d,]+', likes_elements[0])
                return numbers[0] if numbers else ''
        else:
            # Cari elemen dengan teks "views"
            views_elements = soup.find_all(string=re.compile(r'(\d+,?\d*)\s*(view|tayangan)', re.I))
            if views_elements:
                numbers = re.findall(r'[\d,]+', views_elements[0])
                return numbers[0] if numbers else ''
                
    except Exception as e:
        print(f"Error getting engagement: {e}")
    
    return ''

# === Setup Chrome ===
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# === Login manual ===
driver.get('https://www.instagram.com/')
print("🔑 Silakan login manual dulu...")
time.sleep(40)

# === Profil target ===
username_target = 'batikkerisonline'
profile_url = f'https://www.instagram.com/{username_target}/'
driver.get(profile_url)
time.sleep(5)

# === Ambil data profil ===
profile_stats = get_profile_stats(driver)
profile_soup = BeautifulSoup(driver.page_source, 'html.parser')

# Ambil kategori akun
try:
    kategori_akun = ''
    category_elements = profile_soup.find_all(string=re.compile(r'Business|Shopping|Brand|Store', re.I))
    if category_elements:
        kategori_akun = category_elements[0].strip()
except Exception as e:
    kategori_akun = ''
    print(f"Error getting category: {e}")

print(f"📊 Profile Stats: Posts: {profile_stats['posts']}, Followers: {profile_stats['followers']}, Following: {profile_stats['following']}")

# === Ambil warna dominan dari profile picture ===
profile_color = ''
profile_color_name = ''
if profile_stats['profile_pic_url']:
    try:
        response = requests.get(profile_stats['profile_pic_url'])
        img = BytesIO(response.content)
        ct = ColorThief(img)
        profile_color = ct.get_color(quality=1)
        profile_color_name = get_color_name(profile_color)
    except Exception as e:
        print(f"Error getting profile color: {e}")

# === Scroll untuk memuat semua post ===
scroll_times = 10
last_height = driver.execute_script("return document.body.scrollHeight")
for _ in range(scroll_times):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

# === Ambil semua link post & reels ===
post_links = []
posts = driver.find_elements(By.XPATH, '//a[contains(@href, "/p/") or contains(@href, "/reel/")]')
for post in posts:
    href = post.get_attribute('href')
    if href not in post_links:
        post_links.append(href)

print(f"✅ Total post ditemukan: {len(post_links)}")

# Batasi jumlah post yang diambil
post_links = post_links[:50]

# === DATA OUTPUT ===
data = []

# === Loop setiap post ===
for idx, link in enumerate(post_links):
    driver.get(link)
    time.sleep(5)

    # Tentukan jenis konten
    if '/reel/' in link:
        media_type = 'reel'
    else:
        media_type = 'post'

    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Ambil data
    brand = username_target

    # Ambil caption
    caption = ''
    try:
        caption_element = soup.find('div', {'class': re.compile('_a9zs')})
        if caption_element:
            caption = caption_element.get_text().strip()
    except:
        pass

    # Ambil likes/views
    engagement = get_engagement_count(soup, media_type)

    # Ambil comments
    comments_data, comments_count = get_comments_data(driver, soup)

    # Ambil media URL
    media_url = ''
    try:
        media = soup.find('img') or soup.find('video')
        if media:
            media_url = media.get('src', '')
    except:
        pass

    # Ambil dominant color dari media
    dominant_color = ''
    color_name = ''
    if media_url and media_type == 'post':
        try:
            response = requests.get(media_url)
            img = BytesIO(response.content)
            ct = ColorThief(img)
            dominant_color = ct.get_color(quality=1)
            color_name = get_color_name(dominant_color)
        except Exception as e:
            print(f"Error getting media color: {e}")

    # Ambil waktu upload
    upload_time = ''
    try:
        time_element = soup.find('time')
        if time_element:
            upload_time = time_element.get('datetime', '')
    except:
        pass

    # Gabungkan semua comments
    all_comments = ' | '.join(comments_data) if comments_data else ''

    data.append((
        brand, link, caption, engagement, comments_count, all_comments, 
        media_url, media_type, str(dominant_color), color_name,
        'Batik Keris post', upload_time, kategori_akun,
        profile_stats['posts'], profile_stats['followers'], profile_stats['following'],
        str(profile_color), profile_color_name
    ))

    print(f"[{idx+1}] ✅ {link} | {media_type} | Engagement: {engagement} | Comments: {comments_count}")
    time.sleep(2)

# === Save ke Excel ===
df = pd.DataFrame(data, columns=[
    'brand', 'url_post', 'caption', 'engagement', 'comments_count', 'all_comments',
    'media_url', 'media_type', 'dominant_color', 'color_name',
    'content_category', 'upload_time', 'profile_category',
    'total_posts', 'followers', 'following',
    'profile_color', 'profile_color_name'
])

df.to_excel('konten_ig_BatikKeris_enhanced.xlsx', index=False)
print("\n✅ Selesai! Data disimpan ke 'konten_ig_BatikKeris_enhanced.xlsx'")

driver.quit()

🔑 Silakan login manual dulu...
📊 Profile Stats: Posts: 6,247 posts, Followers: 75.4K followers, Following: 28 following
✅ Total post ditemukan: 42
[1] ✅ https://www.instagram.com/batikkerisonline/p/DKhOgPxzyJi/ | post | Engagement:  | Comments: 0
[2] ✅ https://www.instagram.com/batikkerisonline/p/DKhHqdazrdi/ | post | Engagement:  | Comments: 0
[3] ✅ https://www.instagram.com/batikkerisonline/p/DKei2BETBGj/ | post | Engagement:  | Comments: 0
[4] ✅ https://www.instagram.com/batikkerisonline/p/DKcE-w2T95L/ | post | Engagement:  | Comments: 0
[5] ✅ https://www.instagram.com/batikkerisonline/p/DKb-B7cT2NF/ | post | Engagement:  | Comments: 0
[6] ✅ https://www.instagram.com/batikkerisonline/p/DKZZQnvTtam/ | post | Engagement:  | Comments: 0
[7] ✅ https://www.instagram.com/batikkerisonline/p/DKMkwQ_zdvP/ | post | Engagement:  | Comments: 0
[8] ✅ https://www.instagram.com/batikkerisonline/p/DKHXrOvTGSB/ | post | Engagement:  | Comments: 0
[9] ✅ https://www.instagram.com/batikkerisonline/p/DK