## Shows how to collect data from the API and add it to an existing CSV file

In [1]:
import pandas as pd
from instagram_private_api import Client, ClientCompatPatch
import api_requests
import safe_api
import importlib
importlib.reload(api_requests)
importlib.reload(safe_api)
from api_requests import (BasicUserInfo, UserInfo, PublicUserInfoRequests, MediaInfoRequests, 
                            InteractWithUsersActions, CollectDataThroughHashtags)
from safe_api import (SafeClient, ApiLimitReachedException)
import os
import time
import pdb

In [2]:
api_username = 'cuteanimalzzzz123'
api_password = 'instabotOP'
api = SafeClient(api_username, api_password)

In [3]:
#We will collect and save data for ryanlopezzzz's followers.
test_username = 'ryanlopezzzz'
puir = PublicUserInfoRequests(api, test_username)
followers_usernames = puir.get_followers_usernames() 

In [4]:
#Function which collects data through API and saves as pandas dataframe
def get_user_info_table(api, usernames):
    column_names_for_table = ['user_id', 'username', 'private_status', 'follower_count', 'following_count', 
                              'media_count', 'full_name', 'profile_pic_url', 'bio_text', 'url_in_bio', 
                              'hashtag_following_count', 'usertags_count', 'api_request_time']
    table_rows = []
    for username in usernames:
        user_info = UserInfo(api, username)
        user_info_attributes = column_names_for_table[:-1]
        single_user_data = [user_info.__dict__[attribute] for attribute in user_info_attributes]
        api_request_time = time.time()
        row_data = single_user_data + [api_request_time]
        table_rows.append(row_data)
    dataframe = pd.DataFrame(table_rows, columns=column_names_for_table)
    dataframe.set_index('user_id', inplace=True)
    return dataframe

In [5]:
#Load existing CSV file if exists
user_table_filename = 'saved_info/%s_collected_user_info_table.csv'%api_username
if os.path.isfile(user_table_filename):
    loaded_user_table = pd.read_csv(user_table_filename)
    loaded_user_table.set_index('user_id', inplace=True)
else:
    loaded_user_table = None

In [6]:
#Our user table file contained my first 4 followers, i.e. followers_usernames[0:4]
loaded_user_table

Unnamed: 0_level_0,username,private_status,follower_count,following_count,media_count,full_name,profile_pic_url,bio_text,url_in_bio,hashtag_following_count,usertags_count,api_request_time
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
50004795460,cuteanimalzzzz123,False,0,2,1,Cute Animals,https://scontent-lax3-2.cdninstagram.com/v/t51...,I post cute animal photos! 🥰,,0,0,1639386000.0
8478000530,luca.scharrer,False,47,41,2,Luca Scharrer,https://scontent-lax3-2.cdninstagram.com/v/t51...,4th year CCS physics student at UCSB. Interest...,,1,12,1639386000.0
48740245931,jameshatesjameson,True,266,1578,26,James,https://scontent-lax3-2.cdninstagram.com/v/t51...,"spam, love following people.",,0,0,1639386000.0
39612202963,carconnell_,False,418,582,0,Carley Elizabeth,https://scontent-lax3-2.cdninstagram.com/v/t51...,,,0,1,1639386000.0


In [7]:
generated_user_table = get_user_info_table(api, followers_usernames[2:6])

In [8]:
#Note there is intentionally some overlap with the previous loaded table
generated_user_table

Unnamed: 0_level_0,username,private_status,follower_count,following_count,media_count,full_name,profile_pic_url,bio_text,url_in_bio,hashtag_following_count,usertags_count,api_request_time
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
48740245931,jameshatesjameson,True,265,1578,26,James,https://scontent-lax3-2.cdninstagram.com/v/t51...,"spam, love following people.",,0,0,1639391000.0
39612202963,carconnell_,False,419,582,0,Carley Elizabeth,https://scontent-lax3-2.cdninstagram.com/v/t51...,,,0,1,1639391000.0
4120079014,jamieroberts6222,True,568,1943,87,Jamie Roberts,https://scontent-lax3-1.cdninstagram.com/v/t51...,Senior RHS class of 2022.,,1,0,1639391000.0
49836586749,xubryana,False,116,118,0,Bryan Xu,https://scontent-lax3-2.cdninstagram.com/v/t51...,📈,,0,1,1639391000.0


In [9]:
#Combines tables along user_id while eliminating duplicates by only keeping the most recently collected data
def combine_user_info_tables(table1, table2):
    #combine tables
    new_table = pd.concat([table1, table2])
    
    #sort so most recent data is on top
    new_table = new_table.sort_values('api_request_time', ascending=False)
    
    #get indices of rows with duplicated user_ids, except for their first (most recent) appearance in the table
    duplicated_indices = new_table.index.duplicated(keep='first')
    
    #exclude duplicated user_id rows which are not most recent
    new_table = new_table[~duplicated_indices]
    
    return new_table

In [10]:
new_user_table = combine_user_info_tables(loaded_user_table, generated_user_table)

In [11]:
#Note there are no duplicates in this table
new_user_table

Unnamed: 0_level_0,username,private_status,follower_count,following_count,media_count,full_name,profile_pic_url,bio_text,url_in_bio,hashtag_following_count,usertags_count,api_request_time
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
49836586749,xubryana,False,116,118,0,Bryan Xu,https://scontent-lax3-2.cdninstagram.com/v/t51...,📈,,0,1,1639391000.0
4120079014,jamieroberts6222,True,568,1943,87,Jamie Roberts,https://scontent-lax3-1.cdninstagram.com/v/t51...,Senior RHS class of 2022.,,1,0,1639391000.0
39612202963,carconnell_,False,419,582,0,Carley Elizabeth,https://scontent-lax3-2.cdninstagram.com/v/t51...,,,0,1,1639391000.0
48740245931,jameshatesjameson,True,265,1578,26,James,https://scontent-lax3-2.cdninstagram.com/v/t51...,"spam, love following people.",,0,0,1639391000.0
8478000530,luca.scharrer,False,47,41,2,Luca Scharrer,https://scontent-lax3-2.cdninstagram.com/v/t51...,4th year CCS physics student at UCSB. Interest...,,1,12,1639386000.0
50004795460,cuteanimalzzzz123,False,0,2,1,Cute Animals,https://scontent-lax3-2.cdninstagram.com/v/t51...,I post cute animal photos! 🥰,,0,0,1639386000.0


In [12]:
new_user_table.to_csv(user_table_filename)