# 📸 Instagram web scraping demo 📸

### macOS installation instructions

1. Go to https://brew.sh and install the Homebrew package manager.

2. Run the following commands in the terminal to install Python, Selenium, and GeckoDriver.

    `brew install python3`

    `python3 pip -m install selenium`

    `brew install geckodriver`

In [1]:
import os
import csv
import random
from selenium import webdriver
import time

driver = webdriver.Firefox()

## Future selenium import code:
#import org.openqa.selenium.By;
#import org.openqa.selenium.WebDriver;
#import org.openqa.selenium.WebElement;

In [2]:
## Ignore deprecation warnings for this session
import warnings
warnings.filterwarnings('ignore')

In [4]:
## Load a web page
driver.get("https://instagram.com/iffybooks")


In [5]:
## Manually log into an Instagram account before proceeding
driver.get("https://www.instagram.com/accounts/login")


In [6]:
## Load an Instagram user profile
driver.get("https://instagram.com/iffybooks")


In [7]:
## Use an XPath query to find the follower link
driver.find_elements_by_xpath("//a[contains(@href,'followers')]")


[<selenium.webdriver.remote.webelement.WebElement (session="8eb9ac52-78bd-4bad-8421-c8e70c2f2188", element="b8025368-7e0d-4e0a-8255-51e06a02a519")>]

In [8]:
## Get the link object's inner HTML code
driver.find_elements_by_xpath("//a[contains(@href,'followers')]")[0].get_attribute('innerHTML')


'<div class="_7UhW9    vy6Bb     MMzan   KV-D4          uL8Hv        T0kll "><span class="g47SY " title="2,596">2,596</span> followers</div>'

In [9]:
## Modify XPath query to retrieve the number of followers as a string
driver.find_elements_by_xpath("//a[contains(@href,'followers')]//span")[0].get_attribute('innerHTML')


'2,596'

In [10]:
## Convert number of followers to an integer

number_of_followers = int(driver.find_elements_by_xpath("//a[contains(@href,'followers')]//span")[0].get_attribute('innerHTML').replace(',',''))

print(number_of_followers)


2596


In [11]:
## Create a list of Instagram accounts

instagram_user_urls = ["https://www.instagram.com/7480jyoti/", "https://www.instagram.com/verma.1.2.3.4/", \
                  "https://www.instagram.com/shoibkhan212/", "https://www.instagram.com/radhi_ka4823/", \
                  "https://www.instagram.com/cool.boy2603/", "https://www.instagram.com/anab.el2257/", \
                  "https://www.instagram.com/edits_de_todo_267/", "https://www.instagram.com/ltr7555/", \
                  "https://www.instagram.com/rohit67582/", "https://www.instagram.com/jaymachamunda2021/", \
                  "https://www.instagram.com/vsidhu1238/", "https://www.instagram.com/tiffane_nicoly9/", \
                  "https://www.instagram.com/gudu3505/", "https://www.instagram.com/pat.el3830/", \
                  "https://www.instagram.com/mahmood.44377/", "https://www.instagram.com/arsh_deep7011/", 
                  "https://www.instagram.com/karis_hma2556/"] 

## Shuffle the list of accounts
random.shuffle(instagram_user_urls)


In [12]:
## Create a dictionary to store user data
instagram_user_dict = {}


In [13]:
## Scrape follower and following numbers from each account in the list

for instagram_user_url in instagram_user_urls:
    
    ## Extract username from URL
    instagram_username = instagram_user_url.strip('/').split('/')[-1]
    print(instagram_username)
    
    ## Load user profile
    driver.get(instagram_user_url)
    time.sleep(0.3)
    
    ## Extract number of followers from the page
    try: number_of_followers = int(driver.find_elements_by_xpath("//a[contains(@href,'followers')]//span")[0].get_attribute('innerHTML').replace(',',''))
    except: number_of_followers = -1
    print(f"Followers: {number_of_followers}")
    
    ## Extract number number following from the page
    try: number_following = int(driver.find_elements_by_xpath("//a[contains(@href,'following')]//span")[0].get_attribute('innerHTML').replace(',',''))
    except: number_of_followers = -1
    print(f"Following: {number_following}\n")

    ## Add follower and following data to the dictionary
    instagram_user_dict[instagram_username] = [number_of_followers, number_following]
    
    time.sleep(5 + random.random()*3)


radhi_ka4823
Followers: 13
Following: 6889

gudu3505
Followers: 83
Following: 6739

ltr7555
Followers: 26
Following: 6409

jaymachamunda2021
Followers: 37
Following: 6138

rohit67582
Followers: 10
Following: 6441

vsidhu1238
Followers: 21
Following: 7193

poposarmi
Followers: 23
Following: 5873

shoibkhan212
Followers: 18
Following: 6651

anab.el2257
Followers: 13
Following: 7026

cool.boy2603
Followers: 20
Following: 6102

tiffane_nicoly9
Followers: 30
Following: 5331

mahmood.44377
Followers: 13
Following: 5411

verma.1.2.3.4
Followers: 24
Following: 6849

karis_hma2556
Followers: 1
Following: 1196

arsh_deep7011
Followers: 56
Following: 6876

7480jyoti
Followers: 16
Following: 6767

edits_de_todo_267
Followers: 34
Following: 7425

pat.el3830
Followers: 21
Following: 7106



In [14]:
## View data dictionary
from pprint import pprint

pprint(instagram_user_dict)

{'7480jyoti': [16, 6767],
 'anab.el2257': [13, 7026],
 'arsh_deep7011': [56, 6876],
 'cool.boy2603': [20, 6102],
 'edits_de_todo_267': [34, 7425],
 'gudu3505': [83, 6739],
 'jaymachamunda2021': [37, 6138],
 'karis_hma2556': [1, 1196],
 'ltr7555': [26, 6409],
 'mahmood.44377': [13, 5411],
 'pat.el3830': [21, 7106],
 'poposarmi': [23, 5873],
 'radhi_ka4823': [13, 6889],
 'rohit67582': [10, 6441],
 'shoibkhan212': [18, 6651],
 'tiffane_nicoly9': [30, 5331],
 'verma.1.2.3.4': [24, 6849],
 'vsidhu1238': [21, 7193]}
