<img width="8%" alt="Instagram.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Instagram.png" style="border-radius: 15%">

# Instagram - Explore API
<a href="https://bit.ly/3JyWIk6">Give Feedback</a> | <a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=Instagram+-+Get+stats+from+posts:+Error+short+description">Bug report</a>

**Tags:** #instagram #snippet #dataframe #content

**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)

**Last update:** 2024-07-03 (Created: 2024-07-02)

**Description:** This notebook retrieves data from an instagram profile through apify.

## Input

### Import libraries

In [None]:
import requests
import json
import time
import pandas as pd

### Setup variables

- `apify_token`: personal token to access data
- `instagram_profile_url`: link to instagram profile

In [None]:
apify_token = "apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx"
instagram_profile_url = "https://www.instagram.com/naaslife/"

## Model

### Scrape instagram data

In [None]:
def get_instagram_data(apify_token, instagram_profile_url):
    # Extract the username from the profile URL
    username = instagram_profile_url.split('/')[-2]
    
    # Define the Apify API URL for the Instagram Profile Scraper
    api_url = "https://api.apify.com/v2/acts/apify~instagram-profile-scraper/run-sync-get-dataset-items"

    # Define the payload with the necessary parameters
    payload = {
        "usernames": [username],  # Pass the username as a list
        "proxyConfig": {
            "useApifyProxy": True
        }
    }

    # Define the headers with the Apify API token
    headers = {
        "Authorization": f"Bearer {apify_token}",
        "Content-Type": "application/json"
    }

    # Make the request to the Apify API
    response = requests.post(api_url, json=payload, headers=headers)

    # Check if the response is successful
    if response.status_code == 200:
        # Extract the JSON data from the response
        data = response.json()

        # Check if the data contains the profile information
        if data and len(data) > 0:
            return data[0]
        else:
            return "No profile data found."
    else:
        return f"Error: {response.status_code} - {response.text}"

def process_instagram_data(data):
    # Extract and organize data into DataFrames
    posts = data.get('posts', [])
    profiles = [data.get('user', {})]
    places = [post.get('location', {}) for post in posts if post.get('location')]
    hashtags = [hashtag for post in posts for hashtag in post.get('hashtags', [])]
    photos = [post.get('images', []) for post in posts]
    comments = [comment for post in posts for comment in post.get('comments', [])]

    # Create DataFrames
    df_posts = pd.DataFrame(posts)
    df_profiles = pd.DataFrame(profiles)
    df_places = pd.DataFrame(places)
    df_hashtags = pd.DataFrame(hashtags, columns=['hashtag'])
    df_photos = pd.DataFrame(photos)
    df_comments = pd.DataFrame(comments)

    return df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments

## Output

### Display result

In [None]:
profile_data = get_instagram_data(apify_token, instagram_profile_url)

if isinstance(profile_data, dict):
    df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments = process_instagram_data(profile_data)
    
    print("Posts DataFrame:")
    print(df_posts)
    print("\nProfiles DataFrame:")
    print(df_profiles)
    print("\nPlaces DataFrame:")
    print(df_places)
    print("\nHashtags DataFrame:")
    print(df_hashtags)
    print("\nPhotos DataFrame:")
    print(df_photos)
    print("\nComments DataFrame:")
    print(df_comments)
else:
    print(profile_data)