# Instagram API - Automation
*Luís Eduardo Pompeu de Sousa Brasil Háteras*

In [None]:
# Show all output's on Jupyter Notebook
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Authentication on Instagram

In [None]:
from instagram_private_api import Client, ClientCompatPatch

user_name = 'YOUR USERNAME'
password = 'YOUR PASSWORD'

api = Client(user_name, password)
results = api.feed_timeline()

## List of followers and following

In [None]:
from operator import itemgetter
import pandas as pd
import json
import requests

# How to get user's ID ? https://www.instagram.com/{username}/?__a=1
# or just type de username:
def get_userID(username):
    response = requests.get('https://www.instagram.com/{}/?__a=1'.format(username))
    dictionary = json.loads(response.text)
    return(dictionary['graphql']['user']['id'])

print(get_userID('edux21'))


# Following (while for pagination)
def following_users(userid_instagram):
    following = []
    combinacao=[]
    
    results = api.user_following(userid_instagram, rank_token=api.generate_uuid())
    following.extend(results.get('users', []))
    next_max_id = results.get('next_max_id')
    
    while next_max_id:
        results = api.user_following(userid_instagram, rank_token=api.generate_uuid(), max_id=next_max_id)
        following.extend(results.get('users', []))
        next_max_id = results.get('next_max_id')
        
    userid = [following[i]['pk'] for i in range(0,len(following))]
    full_names = [following[i]['full_name'] for i in range(0,len(following))]
    usernames = [following[i]['username'] for i in range(0,len(following))]
    profile_pic_url = [following[i]['profile_pic_url'] for i in range(0,len(following))]
    following_text = ['following' for i in range(0,len(following))]
    
    combinacao.extend([list(i) for i in zip(userid, full_names,
                                            usernames, profile_pic_url, following_text)])
    combinacao = sorted(combinacao, key=itemgetter(2), reverse=False)
    return(combinacao)
    
seguindo = following_users(get_userID('edux21'))
seguindo = pd.DataFrame(seguindo, columns = ['userID' , 'Full Name', 'username', 'Profile Picture', 'Type']) 
seguindo


# Followers (while for pagination)
def followers_users(userid_instagram):
    followers = []
    combinacao = []
    
    results = api.user_followers(userid_instagram, rank_token=api.generate_uuid())
    followers.extend(results.get('users', []))
    next_max_id = results.get('next_max_id')

    while next_max_id:
        results = api.user_followers(userid_instagram, rank_token=api.generate_uuid(), max_id=next_max_id)
        followers.extend(results.get('users', []))
        next_max_id = results.get('next_max_id')
        
    userid = [followers[i]['pk'] for i in range(0,len(followers))]
    full_names = [followers[i]['full_name'] for i in range(0,len(followers))]
    usernames = [followers[i]['username'] for i in range(0,len(followers))]
    profile_pic_url = [followers[i]['profile_pic_url'] for i in range(0,len(followers))]
    followers_text = ['follower' for i in range(0,len(followers))]
    
    combinacao.extend([list(i) for i in zip(userid, full_names,
                                            usernames, profile_pic_url, followers_text)])
    combinacao = sorted(combinacao, key=itemgetter(2), reverse=False)
    return(combinacao)

seguidores = followers_users(get_userID('edux21'))
seguidores = pd.DataFrame(seguidores, columns = ['userID' , 'Full Name', 'username', 'Profile Picture', 'Type']) 
seguidores

# Save in CSV file
seguindo.to_csv('following_usersInstagram.csv', mode='w', sep='\t', encoding='utf-16', index = None)
seguidores.to_csv('followers_usersInstagram.csv', mode='w', sep='\t', encoding='utf-16', index = None)




## Mutual friends, Following alone, Follower alone

In [None]:
# Following + Followers
df = pd.concat([seguidores,seguindo], axis=0).reset_index()
total_following_followers = len(df)

# Mutual friends - following and follower
mutual_friends = df[df.duplicated(subset='username')].reset_index()
mutual_friends.drop(columns=['level_0', 'index', 'Type'], inplace=True)
mutual_friends

# People who follows you and you don't follow back or 
# People who you follow and they don't follow you back
df = df.drop_duplicates(subset=['username'], keep=False)

# Following people but who don't follow you back
following_alone = df[df['Type']=='following'].reset_index()
following_alone.drop(columns=['level_0', 'index'], inplace=True)
following_alone

# Followers who you don't follow them back
followers_alone = df[df['Type']=='follower'].reset_index()
followers_alone.drop(columns=['level_0', 'index'], inplace=True)
followers_alone


# Save in CSV file
mutual_friends.to_csv('mutualFriendsInstagram.csv', mode='w', sep='\t', encoding='utf-16', index = None)
following_alone.to_csv('following_alone.csv', mode='w', sep='\t', encoding='utf-16', index = None)
followers_alone.to_csv('followers_alone.csv', mode='w', sep='\t', encoding='utf-16', index = None)


print('Mutual friends total (%): ' + str(100*2*len(mutual_friends)/total_following_followers))
print('Following people who dont follow you back (%): ' + str(100*len(following_alone)/total_following_followers))
print('Followers who you dont follow them back (%): ' + str(100*len(followers_alone)/total_following_followers))

## Photos - Date of publication, location, number of likes, comments, URL

In [None]:
import pandas as pd
from collections import Counter
import datetime

# Creating dataframe
df = pd.DataFrame()


# Extract photos information
def photo_information(username_insta):
    
    # Initialize the lists
    likes=[]
    comments_count=[]
    url=[]
    teste=[]
    data_foto=[]
    latitudelista = []
    longitudelista = []
    locationlista = []
    caption_photo=[]
    curtidores_username=[]
    curtidores_fullname=[]
    username_lista=[]

    # Extract all photos information (while for pagination)
    request = api.username_feed(username_insta)
    teste.extend(request.get('items'))
    next_max_id = request.get('next_max_id')
    while next_max_id:
        request = api.username_feed(username_insta, max_id=next_max_id)
        next_max_id = request.get('next_max_id')
        teste.extend(request.get('items'))
    

    # Número de likes em todas as fotos
    for i in range(0,len(teste)):
        # username
        username_lista.append(username_insta)

        # Date
        if 'taken_at' in teste[i]:
            data_foto.append(datetime.datetime.utcfromtimestamp(teste[i]['taken_at']).strftime('%Y-%m-%d %H:%M:%S'))
        else:
            data_foto.append('-')

        # Caption text photo
        if ('caption' in teste[i]) and (not teste[i]['caption'] is None):
            titulo_foto=str(teste[i]['caption']['text'])
            caption_photo.append(titulo_foto)
        else:
            caption_photo.append('-')

        # Number of likes
        if 'like_count' in teste[i]:
            likes.append(teste[i]['like_count'])
        else:
            likes.append('-')

        # Comments count
        if 'comment_count' in teste[i]:
            comments_count.append(teste[i]['comment_count'])
        else:
            comments_count.append('-')

        # Latitude, longitude, location
        if 'lat' in teste[i]:
            latitudelista.append(teste[i]['lat'])
        else:
            latitudelista.append('-')
            
        if 'lng' in teste[i]:    
            longitudelista.append(teste[i]['lng'])
        else:
            longitudelista.append('-')
            
            
        if 'location' in teste[i]:   
            locationlista.append(teste[i]['location']['city'])
        else:
            locationlista.append('-')

        # URL
        if 'carousel_media' not in teste[i]:
            url.append(teste[i]['image_versions2']['candidates'][0]['url'])
        else:
            url.append(teste[i]['carousel_media'][0]['image_versions2']['candidates'][0]['url'])
    
    combinacao21=[]
    combinacao21.extend([list(i) for i in zip(data_foto, username_lista,
                                        likes, comments_count, caption_photo, locationlista, latitudelista, longitudelista, url)])
    combinacao21 = sorted(combinacao21, key=itemgetter(1), reverse=False)
    return(combinacao21)

    
vamosla = photo_information('eduardocandido6')  
vamosla = pd.DataFrame(vamosla, columns = ['Data' , 'Username', 'Likes', 'Comments', 'Title Photo', 'Location', 'Latitude', 'Longitude', 'URL']) 
vamosla['Data'] = pd.to_datetime(vamosla['Data'])
# vamosla['year'] = vamosla['Data'].dt.year
vamosla

# Save in CSV file
vamosla.to_csv('photosInformation.csv', mode='w', sep='\t', encoding='utf-16', index = None)

# --- Top and Worst Photo

# Worst Photo
min_likes = vamosla[vamosla['Likes']==vamosla['Likes'].min()]
min_likes

# Best Photo
max_likes = vamosla[vamosla['Likes']==vamosla['Likes'].max()]
max_likes

# Average number of likes
vamosla['Likes'].mean()

# Median number of likes
vamosla['Likes'].median()


## Download all photos

In [None]:
import os, urllib.request
import random

def download_all_images(username_insta, folder):
    os.mkdir(folder)
    listas = photo_information(username_insta)
    for i in range(0,len(listas)):
        endereco = folder + '//' + username_insta + str(listas[i][-3]) + '&' +  str(listas[i][-2]) +'_'+str(random.randint(1,1001))+'.jpg'
        urllib.request.urlretrieve(listas[i][-1], endereco)  
        print(listas[i][-1])

download_all_images('victor_linharess', 'Fotos_victor')

## People who most like any user photos

In [None]:
# Media-id: https://api.instagram.com/oembed/?url=https://www.instagram.com/p/B_SUEKaDZhK/
# Extract "media_id" from all photos
def mediaid(username_insta):
    mediaid=[]
    request = api.username_feed(username_insta)
    mediaid.extend([request['items'][i]['pk'] for i in range(0, len(request['items']))])
    next_max_id = request.get('next_max_id')
    while next_max_id:
        request = api.username_feed(username_insta, max_id=next_max_id)
        mediaid.extend([request['items'][i]['pk'] for i in range(0, len(request['items']))])
        next_max_id = request.get('next_max_id')
    return(mediaid)

listacomids = mediaid('eduardocandido6')
len(listacomids)

# Likers from all photos (1000 likes limitation per photo)
def likers(username_insta):
    curtidores_id=[]
    curtidores_username=[]
    curtidores_fullname=[]
    curtidores_profile_pic=[]
    combinacao=[]
    lista_image_id = mediaid(username_insta)
    # Change "len(lista_image_id)" to number of photos you want to extract - default = all
    for i in range(0, len(lista_image_id)):
        for curtidas in api.media_likers(lista_image_id[i]).get('users'):
            curtidores_username.append(curtidas['username'])
            curtidores_fullname.append(curtidas['full_name'])
            curtidores_id.append(curtidas['pk'])
            curtidores_profile_pic.append(curtidas['profile_pic_url'])
    
    combinacao.extend([list(i) for i in zip(curtidores_id, curtidores_fullname,
                                        curtidores_username, curtidores_profile_pic)])
    combinacao = sorted(combinacao, key=itemgetter(2), reverse=False)
    return(combinacao)
    
# Likers - complete list
curtidores = likers('eduardocandido6')
curtidores = pd.DataFrame(curtidores, columns = ['UserID' , 'Name', 'Username', 'URL Person']) 
curtidores

# Counting likes by username or name
curtidores_username_filtrado=Counter(curtidores['Username']).most_common()
curtidores_fullname_filtrado=Counter(curtidores['Name']).most_common()
curtidores_username_filtrado = pd.DataFrame(curtidores_username_filtrado, columns = ['Username' , 'Qtd'])
curtidores_username_filtrado

# Save in CSV file
curtidores_username_filtrado.to_csv('likers.csv', mode='w', sep='\t', encoding='utf-16', index = None)



## People who most comment any user photos

In [None]:
# Extract the first 500 user's that commented on a photo
def comentaristas(username_insta):
    lista_media_id = mediaid(username_insta)
    comentaristas=[]
    # Change "len(lista_image_id)" to number of photos you want to extract - default = all
    for i in range(0,len(lista_media_id)):
        comentarios = api.media_n_comments(lista_media_id[i], n=500)
        for n_com in range(0,len(comentarios)):
            comentaristas.append(comentarios[n_com]['user']['username'])
    return(comentaristas)
        
pessoas_comentam = comentaristas('eduardocandido6')
principais_comentaristas=Counter(pessoas_comentam).most_common()
principais_comentaristas = pd.DataFrame(principais_comentaristas, columns = ['Username' , 'Qtd'])
principais_comentaristas

# Save in CSV file
principais_comentaristas.to_csv('commenters.csv', mode='w', sep='\t', encoding='utf-16', index = None)


## Like all the photos of an user

In [None]:
# Curtir todas as fotos da pessoa
listacomids = mediaid('eduardocandido6')
for i in range(0,len(listacomids)):
    api.post_like(listacomids[i], module_name='feed_timeline')

## Other useful functions

In [None]:
import requests
import json

# Get username ID
get_userID('quebrandootabu')

# Convert media_id to URL link:
api.media_permalink(2292983407267649610)['permalink']

# Get media_id using photo URL
def URLtoMediaID(url):
    texto = requests.get('https://api.instagram.com/oembed/?url={}'.format(url)).text
    formatado = json.loads(texto)
    string = formatado['media_id']
    return(string[:string.find('_')])
URLtoMediaID('https://www.instagram.com/p/4a03XqpveL/')

# Post a comment - (media_id, text)
#api.post_comment(1016357169326913419, 'Teste :)')

# Check userid is following a tag
api.tags_user_following(196211285)

# User detail info - experimental
#api.user_detail_info(1624955639)

# Check userID feed
#api.user_feed(7578321540)

# Follow someone (userid)
api.friendships_create(7578321540)

# Unfollow
#friendships_destroy(userid)

# Friendship status
api.friendships_show(get_userID('luanaravena1'))

# Requests of friendship pending
api.friendships_pending()['users']

# Get user feed and story information
api.user_story_feed(get_userID('manueladavila'))

# Get user stories
api.user_story_feed(get_userID('tiaoviana'))['reel']

# Get comments likers
#api.comment_likers(17893984120500475)

# Get comments replies
#api.comment_replies(URLtoMediaID('https://www.instagram.com/p/B_kndbCpkvx/'), 17893984120500475)

# More...
# https://instagram-private-api.readthedocs.io/en/latest/api.html

## Using WebAPI:

In [None]:
# WebAPI

import hashlib
import string
import random
import pprint
from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError

class MyClient(Client):
    @staticmethod
    def _extract_rhx_gis(html):
        options = string.ascii_lowercase + string.digits
        text = ''.join([random.choice(options) for _ in range(8)])
        return hashlib.md5(text.encode())

# Without any authentication
web_api = MyClient(auto_patch=True, drop_incompat_keys=False)

# Get location feed (location_id) or location_story_feed(location_id)
web_api.location_feed(273471170716)


## How to be the first comment on a popular instagram's page

In [None]:
from instagram_private_api import Client, ClientCompatPatch
import time
from covid import Covid

# Extract covid-19 data
covid = Covid(source="worldometers")

userid_insta = 4544395091
feed_qt = api.user_feed(userid_insta)

# Last publication
last_publication = feed_qt['items'][0]['pk']

while True:
    new_publication = api.user_feed(userid_insta)['items'][0]['pk']
    mortes = covid.get_status_by_country_name("brazil")['deaths']
    if new_publication!=last_publication:
        frase_publicacao = 'Número de mortes no Brasil COVID-19: {}'.format(mortes)
        api.post_comment(new_publication, frase_publicacao)
        print(api.media_permalink(new_publication)['permalink'])
        last_publication=new_publication
    time.sleep(10)

## Multiple pages:

In [None]:
from instagram_private_api import Client, ClientCompatPatch
import time

# Initialize dictionaries
last_publication = {}
new_publication = {}

#opovoonline

# Usernames
usernames = ['quebrandootabu', 'jornaloglobo', 'conexaopoliticabrasil', 'brasil.de.direita', 
             'mblivre', 'joicehasselmannoficial', 'cristiano', 'beyonce', 'senadorhumberto', 'portalg1', 'bolsonarosp',
            'jairmessiasbolsonaro', 'ptbrasil', 'jornalistaslivres']

# Get the last publication
for i in range(0, len(usernames)):
    last_publication[i] = api.username_feed(usernames[i])['items'][0]['pk']
    
while True:
    
    # Text to post
    frase_publicacao = 'Bom dia'
    
    
    for i in range(0, len(usernames)):
        # Get new publication
        new_publication[i] = api.username_feed(usernames[i])['items'][0]['pk']
        
        # If there is a new publication then post a comment
        if new_publication[i] != last_publication[i]:
            api.post_comment(new_publication[i], frase_publicacao)
            print(api.media_permalink(new_publication[i])['permalink'])
            last_publication[i]=new_publication[i]

# Wait 10 seconds
time.sleep(10)

## Follow multiple users

In [None]:
seguindo = following_users(get_userID('whinderssonnunes'))
seguindo = pd.DataFrame(seguindo, columns = ['userID' , 'Full Name', 'username', 'Profile Picture', 'Type']) 
seguindo

for i in seguindo['userID']:
    api.friendships_create(i)

## Post a comment in all photos of a person

In [None]:
# Curtir todas as fotos da pessoa
listacomids = mediaid(username)
for i in range(4,len(listacomids)):
    frase_publicacao = 'Bom dia !' 
    api.post_comment(listacomids[i], frase_publicacao)
    print(listacomids[i])