In [None]:
import time
import requests
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from colorthief import ColorThief
import webcolors
import re

# === Fungsi helper: RGB ke nama warna ===
def closest_color(requested_color):
    min_colors = {}
    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - requested_color[0]) ** 2
        gd = (g_c - requested_color[1]) ** 2
        bd = (b_c - requested_color[2]) ** 2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]

def get_color_name(rgb_color):
    try:
        return closest_color(rgb_color)
    except Exception:
        return 'unknown'

# === Fungsi untuk mengambil profile stats ===
def get_profile_stats(driver):
    stats = {
        'posts': '',
        'followers': '',
        'following': '',
        'profile_pic_url': ''
    }
    
    try:
        # Ambil jumlah post, followers, following
        stats_elements = driver.find_elements(By.XPATH, "//header//section//ul//li")
        
        if len(stats_elements) >= 3:
            stats['posts'] = stats_elements[0].text.split('\n')[0]
            stats['followers'] = stats_elements[1].text.split('\n')[0]
            stats['following'] = stats_elements[2].text.split('\n')[0]
            
        # Ambil profile picture URL
        img_element = driver.find_element(By.XPATH, "//header//img")
        stats['profile_pic_url'] = img_element.get_attribute('src')
        
    except Exception as e:
        print(f"Error getting profile stats: {e}")
    
    return stats

# === Fungsi untuk mengambil komentar per user ===
def get_comments_with_users(driver):
    comments = []
    max_attempts = 3
    
    try:
        # Buka section komentar jika ada tombol komentar
        try:
            comment_button = driver.find_element(By.XPATH, "//span[contains(text(), 'comments') or contains(text(), 'komentar')]/ancestor::button")
            comment_button.click()
            time.sleep(2)
        except:
            pass
        
        # Scroll untuk memuat lebih banyak komentar
        for _ in range(max_attempts):
            try:
                # Scroll komentar
                driver.execute_script("""
                    const commentSection = document.querySelector('div[role="dialog"]');
                    if (commentSection) {
                        commentSection.scrollTop = commentSection.scrollHeight;
                    }
                """)
                time.sleep(2)
                
                # Coba klik "Load more comments" jika ada
                try:
                    load_more = driver.find_element(By.XPATH, "//button[contains(., 'more comments') or contains(., 'komentar lainnya')]")
                    load_more.click()
                    time.sleep(2)
                except:
                    break
            except:
                break
        
        # Ambil semua komentar dengan username
        comment_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'x9f619') and .//a[contains(@href, '/')]]")
        
        for comment in comment_elements:
            try:
                username = comment.find_element(By.XPATH, ".//a[contains(@href, '/')]").text.strip()
                comment_text = comment.find_element(By.XPATH, ".//span[contains(@dir, 'auto')]").text.strip()
                
                if username and comment_text:
                    comments.append({
                        'username': username,
                        'comment': comment_text,
                        'timestamp': comment.find_element(By.XPATH, ".//time").get_attribute('datetime') if comment.find_elements(By.XPATH, ".//time") else ''
                    })
            except:
                continue
                
    except Exception as e:
        print(f"Error getting comments: {e}")
    
    return comments

# === Fungsi untuk mengambil likes/views ===
def get_engagement_count(driver, media_type):
    try:
        if media_type == 'post':
            # Untuk post: cari likes
            try:
                likes_element = driver.find_element(By.XPATH, "//section//div[contains(., 'likes') or contains(., 'suka')]")
                likes_text = likes_element.text
                return re.search(r'(\d+,?\d*)', likes_text).group(1)
            except:
                pass
            
            # Alternatif pencarian likes
            spans = driver.find_elements(By.TAG_NAME, "span")
            for span in spans:
                text = span.text.lower()
                if 'like' in text or 'suka' in text:
                    match = re.search(r'(\d+,?\d*)', text)
                    if match:
                        return match.group(1)
        else:
            # Untuk reel: cari views
            try:
                views_element = driver.find_element(By.XPATH, "//section//span[contains(., 'views') or contains(., 'tayangan')]")
                views_text = views_element.text
                return re.search(r'(\d+,?\d*)', views_text).group(1)
            except:
                pass
            
            # Alternatif pencarian views
            spans = driver.find_elements(By.TAG_NAME, "span")
            for span in spans:
                text = span.text.lower()
                if 'view' in text or 'tayangan' in text:
                    match = re.search(r'(\d+,?\d*)', text)
                    if match:
                        return match.group(1)
                
    except Exception as e:
        print(f"Error getting engagement: {e}")
    
    return '0'

# === Fungsi untuk mengambil warna dominan ===
def get_dominant_color(media_url):
    try:
        response = requests.get(media_url)
        img = BytesIO(response.content)
        ct = ColorThief(img)
        dominant_color = ct.get_color(quality=1)
        color_name = get_color_name(dominant_color)
        return str(dominant_color), color_name
    except Exception as e:
        print(f"Error getting color: {e}")
        return '', 'unknown'

# === Setup Chrome ===
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# === Login manual ===
driver.get('https://www.instagram.com/')
print("🔑 Silakan login manual dulu...")
time.sleep(40)

# === Profil target ===
username_target = 'batikula'
profile_url = f'https://www.instagram.com/{username_target}/'
driver.get(profile_url)
time.sleep(5)

# === Ambil data profil ===
profile_stats = get_profile_stats(driver)
profile_soup = BeautifulSoup(driver.page_source, 'html.parser')

# Ambil kategori akun
try:
    kategori_akun = ''
    category_elements = profile_soup.find_all(string=re.compile(r'Business|Shopping|Brand|Store', re.I))
    if category_elements:
        kategori_akun = category_elements[0].strip()
except Exception as e:
    kategori_akun = ''
    print(f"Error getting category: {e}")

print(f"📊 Profile Stats: Posts: {profile_stats['posts']}, Followers: {profile_stats['followers']}, Following: {profile_stats['following']}")

# === Ambil warna dominan dari profile picture ===
profile_color, profile_color_name = get_dominant_color(profile_stats['profile_pic_url']) if profile_stats['profile_pic_url'] else ('', 'unknown')

# === Scroll untuk memuat semua post ===
scroll_times = 5
last_height = driver.execute_script("return document.body.scrollHeight")
for _ in range(scroll_times):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

# === Ambil semua link post & reels ===
post_links = []
posts = driver.find_elements(By.XPATH, '//a[contains(@href, "/p/") or contains(@href, "/reel/")]')
for post in posts:
    href = post.get_attribute('href')
    if href not in post_links:
        post_links.append(href)

print(f"✅ Total post ditemukan: {len(post_links)}")

# Batasi jumlah post yang diambil (50: 25 post + 25 reel)
post_links = post_links[:50]

# === DATA OUTPUT ===
data = []

# === Loop setiap post ===
for idx, link in enumerate(post_links):
    try:
        driver.get(link)
        time.sleep(5)

        # Tentukan jenis konten
        media_type = 'reel' if '/reel/' in link else 'post'

        # Ambil data dasar
        brand = username_target

        # Ambil caption
        caption = ''
        try:
            caption_element = driver.find_element(By.XPATH, "//div[contains(@class, '_a9zs') or contains(@class, 'x5yr21d')]")
            caption = caption_element.text.strip()
        except:
            pass

        # Ambil likes/views
        engagement = get_engagement_count(driver, media_type)

        # Ambil semua komentar dengan user
        comments = get_comments_with_users(driver)
        comments_count = len(comments)

        # Format komentar untuk Excel
        comments_formatted = "\n".join([f"{c['username']}: {c['comment']}" for c in comments])

        # Ambil media URL
        media_url = ''
        try:
            media = driver.find_element(By.XPATH, "//img[contains(@class, 'x5yr21d')] or //video")
            media_url = media.get_attribute('src')
        except:
            pass

        # Ambil dominant color dari media
        dominant_color, color_name = get_dominant_color(media_url) if media_url and media_type == 'post' else ('', '')

        # Ambil waktu upload
        upload_time = ''
        try:
            time_element = driver.find_element(By.TAG_NAME, "time")
            upload_time = time_element.get_attribute('datetime')
        except:
            pass

        data.append((
            brand, link, caption, engagement, comments_count, comments_formatted, 
            media_url, media_type, dominant_color, color_name,
            'Batik Ula Content', upload_time, kategori_akun,
            profile_stats['posts'], profile_stats['followers'], profile_stats['following'],
            profile_color, profile_color_name
        ))

        print(f"[{idx+1}/{len(post_links)}] ✅ {link}")
        print(f"   Type: {media_type} | Engagement: {engagement} | Comments: {comments_count}")
        print(f"   Caption: {caption[:50]}..." if caption else "   No caption")
        print(f"   First comment: {comments[0]['username']}: {comments[0]['comment'][:30]}..." if comments else "   No comments")
        time.sleep(3)

    except Exception as e:
        print(f"Error processing post {link}: {e}")
        continue

# === Save ke Excel ===
df = pd.DataFrame(data, columns=[
    'brand', 'url_post', 'caption', 'engagement', 'comments_count', 'comments_with_users',
    'media_url', 'media_type', 'dominant_color', 'color_name',
    'content_category', 'upload_time', 'profile_category',
    'total_posts', 'followers', 'following',
    'profile_color', 'profile_color_name'
])

# Simpan ke Excel dengan format yang baik
writer = pd.ExcelWriter('konten_ig_Batikula_detailed.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False)

# Auto-adjust column widths
worksheet = writer.sheets['Sheet1']
for idx, col in enumerate(df.columns):
    max_len = max(df[col].astype(str).map(len).max(), len(col)) + 2
    worksheet.set_column(idx, idx, max_len)

writer.close()
print("\n✅ Selesai! Data disimpan ke 'konten_ig_Batikula_detailed.xlsx'")

driver.quit()

🔑 Silakan login manual dulu...
   Setelah login, tekan Enter di terminal ini...
📊 Profile Stats - Posts: 0, Followers: 0, Following: 0
🏷️ Kategori akun: Clothing (Brand)
📜 Scrolling untuk load semua post...
✅ Total post ditemukan: 12
📝 Akan memproses 12 post

[1/12] Processing: https://www.instagram.com/batikula/p/DFpeuDozT6_/
    ✅ POST | Comments: 15 | Likes: 541
    🎨 Color: unknown (206, 198, 194)
    💬 Top commenters: {'akunurulkh': 1, 'hevialvio_h': 1, 'durotunnasikhah77': 1}

[2/12] Processing: https://www.instagram.com/batikula/p/DKME4tpThcq/
    ✅ POST | Comments: 1 | Likes: 69
    🎨 Color: unknown (225, 218, 207)
    💬 Top commenters: {'amyourhopes': 1}

[3/12] Processing: https://www.instagram.com/batikula/p/DMg105zTSMb/
🔚 Browser closed


KeyboardInterrupt: 