### Links Scraping From tagged Videos of an Instagram Account

In [None]:
import time
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# Instagram credentials TO LOGIN
INSTAGRAM_USERNAME = "Your Username"
INSTAGRAM_PASSWORD = "Your Password"
TARGET_USERNAME = input("Enter username to search for tagged users: ")  # Instagram username to search
CSV_FILENAME = "OUTPUT.csv"#
scroll_wants = int(input("Enter the number of times to scroll: "))

def load_existing_links(filename):
    """Load existing links from the CSV file to avoid duplicates."""
    existing_links = set()
    if os.path.exists(filename):
        with open(filename, mode='r', encoding='utf-8') as file:
            reader = csv.reader(file)
            existing_links = {row[0] for row in reader if row}  # Store existing links in a set
    return existing_links

def instagram_tagged_posts():
    # Setup WebDriver (Chrome)
    options = webdriver.ChromeOptions()
    options.add_argument("--start-maximized")  # Open browser in maximized mode
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    try:
        # Load existing links to avoid duplicates
        existing_links = load_existing_links(CSV_FILENAME)

        # Step 1:Log in to Instagram
        print("Logging into Instagram...")
        driver.get("https://www.instagram.com/accounts/login/")
        time.sleep(5)  # Wait for the login page to load

        # Input username and password
        driver.find_element(By.NAME, "username").send_keys(INSTAGRAM_USERNAME)
        driver.find_element(By.NAME, "password").send_keys(INSTAGRAM_PASSWORD + Keys.RETURN)
        time.sleep(7)  # Wait for login to complete

        # Step 2: Navigate to the target account
        print(f" Navigating to @{TARGET_USERNAME} tagged posts...")
        driver.get(f"https://www.instagram.com/{TARGET_USERNAME}/tagged/")
        time.sleep(5)  # Wait for the tagged posts page to load

        # Step 3: Extract `<a>` tags with href attributes and scroll
        print(" Extracting tagged post URLs...")
        extracted_urls = set()  # Store newly extracted URLs

        # Open CSV file in append mode
        with open(CSV_FILENAME, mode='a', newline='', encoding='utf-8') as csvfile:
            csv_writer = csv.writer(csvfile)
            scroll_attempts = 0

            while scroll_attempts < scroll_wants:  # Limit the number of scrolls
                links = driver.find_elements(By.TAG_NAME, "a")
                new_links_found = False

                for link in links:
                    href = link.get_attribute("href")
                    if href and "/reel/" in href and href not in existing_links and href not in extracted_urls:
                        # Save new URL to the CSV file
                        csv_writer.writerow([href])
                        csvfile.flush()  # Ensure data is written to the file
                        extracted_urls.add(href)  # Add to set to avoid duplicates in the same run
                        new_links_found = True

                if new_links_found:
                    print(f" {len(extracted_urls)} new unique URLs saved...")

                # Scroll down
                print(f" Scrolling... (Attempt {scroll_attempts + 1})")
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(5)  # Wait for new content to load

                scroll_attempts += 1

        print(f"\n All extracted links saved to {CSV_FILENAME}.")

    except Exception as e:
        print(f"Error: {e}")

    finally:
        driver.quit()  # Close the browser

# Execute the function
instagram_tagged_posts()


### Add Column name as Url

In [2]:
import pandas as pd

# Load the CSV file (assuming no header)
df = pd.read_csv(CSV_FILENAME, header=None)

# Assign column name
df.columns = ['url']

# Save it back
df.to_csv(CSV_FILENAME, index=False)

### usernames extraction

In [None]:
import pandas as pd
import re

# Load the CSV file
df = pd.read_csv(CSV_FILENAME)

# Assuming the column with URLs is named 'url' (change if different)
# Extract usernames using regex
df['username'] = df['url'].apply(lambda x: re.search(r"instagram\.com/([^/]+)/reel", x).group(1) if pd.notnull(x) else None)

# Save to a new CSV with usernames
df.to_csv('usernames_extracted.csv', index=False)

print(" Usernames extracted and saved to 'usernames_extracted.csv'")


### details fetching one by one

In [None]:
import pandas as pd
import csv
import time
from instagrapi import Client
import json
import os

# Instagram credentials
USERNAME = INSTAGRAM_USERNAME
PASSWORD = INSTAGRAM_PASSWORD# Replace with your actual password

# Initialize the Client
cl = Client()

# Login to Instagram
try:
    cl.login(USERNAME, PASSWORD)
    print(" Login successful!")
except Exception as e:
    print(f" Login failed: {e}")
    exit()

# Load the usernames from CSV
df = pd.read_csv('usernames_extracted.csv')

# Output file
output_file = 'user_details_liveappend.csv'

# Ensure file exists and create header if it doesn't
if not os.path.isfile(output_file):
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Followers', 'Bio', 'Website'])

# Loop through each username
for index, row in df.iterrows():
    usrname = row['username']  # Assuming your CSV column is 'username'

    try:
        # Fetch user info
        user_id = cl.user_id_from_username(usrname)
        user_info = cl.user_info(user_id)

        followers = user_info.follower_count
        bio = user_info.biography
        website = user_info.external_url

        print(f" {usrname}: {followers} followers")

        # Immediately open CSV and append this record
        with open(output_file, 'a', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow([usrname, followers, bio, website])
            f.flush()  # Force writing to disk immediately

        time.sleep(2)  # Respect Instagram's API limits

    except json.JSONDecodeError as e:
        print(f" JSONDecodeError for {usrname}: {e}")
        continue

    except Exception as e:
        print(f" Error for {usrname}: {e}")
        continue

print(f"\n All data appended successfully to {output_file}")


#### Printing the Scraped data

In [None]:
import pandas as pd
d=pd.read_csv("user_details_liveappend.csv")
d