# Import

In [1]:
import requests
import json
import pandas as pd
import os


# Constants & Parameters

In [14]:
PHAVER_GRAPHQL_ENDPOINT = os.getenv("PHAVER_GRAPHQL_ENDPOINT")
PHAVER_PROFILE_ID = os.getenv("PHAVER_PROFILE_ID")

FIREBASE_TOKEN_URL = os.getenv("FIREBASE_API_URL") + os.getenv("FIREBASE_API_KEY")
FIREBASE_REFRESH_TOKEN = os.getenv("FIREBASE_REFRESH_TOKEN")

LIMIT_PER_REQUEST = 100
MAX_FOLLOWINGS_REQUESTED = 100

# Functions

In [15]:
# Utility function to load a GraphQL query or fragment from a file
def load_graphql_file(file_path):
    with open(file_path, 'r') as file:
        return file.read()
    
# Load fragments and query from their respective files
FRAGMENTS = load_graphql_file('graphql/fragments/Fragments.gql')
FOLLOWINGS_QUERY = load_graphql_file('graphql/queries/FollowingsQuery.gql')
FOLLOWERS_QUERY = load_graphql_file('graphql/queries/FollowersQuery.gql')
POINTS_QUERY = load_graphql_file('graphql/queries/PointsQuery.gql')

In [21]:
def request_access_token():
    payload = {
        "grantType": "refresh_token",
        "refreshToken": FIREBASE_REFRESH_TOKEN
    }

    response = requests.post(FIREBASE_TOKEN_URL, json=payload)
    response.raise_for_status()
    access_data = response.json()
    return access_data['access_token']


def request_followings(profile_id, limit_per_request, offset, access_token):
    """Fetch a batch of followings from the API"""

    headers = {
        "Authorization": "Bearer " + access_token,
        "Content-Type": "application/json"
    }
    
    payload = {
        "query": FRAGMENTS + FOLLOWINGS_QUERY,
        "variables": {
            "profileId": profile_id,
            "limit": limit_per_request,
            "offset": offset
        }
    }
    response = requests.post(
        PHAVER_GRAPHQL_ENDPOINT, 
        headers=headers, 
        json=payload
    )
    
    # Raise an exception for HTTP errors
    response.raise_for_status()
    
    # Parse the JSON response
    data = response.json()

    # Check if the response contains any errors
    if 'errors' in data:
        raise Exception(data['errors'])
    
    # get the followings from the response
    followings = data['data']['followings']
    
    return followings

def request_points(profile_id, access_token):
    """Fetch the points for a profile from the API"""

    headers = {
        "Authorization": "Bearer " + access_token,
        "Content-Type": "application/json"
    }
    
    payload = {
        "query": FRAGMENTS + POINTS_QUERY,
        "variables": {
            "profileId": profile_id
        }
    }
    response = requests.post(
        PHAVER_GRAPHQL_ENDPOINT, 
        headers=headers, 
        json=payload
    )
    
    # Raise an exception for HTTP errors
    response.raise_for_status()
    
    # Parse the JSON response
    data = response.json()

    # Check if the response contains any errors
    if 'errors' in data:
        raise Exception(data['errors'])
    
    # get the points from the response
    points = data['data']['phaverPoints']['phaverPointsCurrent']
    
    return points


def save_to_json(data, filename):
    """Save the data to a JSON file"""
    # create the folder if it doesn't exist
    folder = os.path.dirname(filename)
    os.makedirs(folder, exist_ok=True)
    with open(filename, 'w') as f:
        json.dump(data, f, indent=4)

def get_all_points(followings, access_token):
    """Fetch all points by iterating over paginated results"""
    for following in followings:
        profile_id = following['followedProfile']['id']
        points = request_points(profile_id, access_token)
        following['followedProfile']['points'] = points
    return followings


def get_all_followings(profile_id, limit_per_request, max_followings_requested, access_token):
    """Fetch all followings by iterating over paginated results"""
    all_followings = []
    offset = 0
    
    while len(all_followings) < max_followings_requested:
        followings = request_followings(profile_id, limit_per_request, offset, access_token)
        
        # If no more followings, break the loop
        if not followings:
            break
        
        # Append the fetched followings to the list
        all_followings.extend(followings)
        
        # Increment the offset by the limit for pagination
        offset += limit_per_request
        
    return all_followings

# Function to flatten the JSON data
def flatten_followings(followings):
    """Flatten the followings JSON data into Pandas DataFrame"""

    flattened_data = []

    for item in followings:
        followed_profile = item['followedProfile']
       
        # Flatten the nested fields
        flattened_profile = {
            "id": followed_profile.get("id"),
            "username": followed_profile.get("username"),
            "profileCreatedAt": followed_profile.get("createdAt"),
            "followingDate": item.get("createdAt"),
            "followerCount": followed_profile.get("profileAggregates", {}).get("followerCount") if followed_profile.get("profileAggregates") else None,
            "followingCount": followed_profile.get("profileAggregates", {}).get("followingCount") if followed_profile.get("profileAggregates") else None,
            "points": followed_profile.get("points"),
            "credLevel": followed_profile.get("credLevel"),
            "badge": followed_profile.get("badge"),
            "phaverFrens": followed_profile.get("phaverFrens"),
            "verification": followed_profile.get("verification"),
            "verified": followed_profile.get("verified"),
            "isUserFollowing": followed_profile.get("isUserFollowing"),
            "lensProfile.lensHandle": followed_profile.get("lensProfile", {}).get("lensHandle") if followed_profile.get("lensProfile") else None,
            "lensProfile.isUserFollowing": followed_profile.get("lensProfile", {}).get("isUserFollowing") if followed_profile.get("lensProfile") else None,
            "farcasterProfile.name": followed_profile.get("farcasterProfile", {}).get("name") if followed_profile.get("farcasterProfile") else None,
            "farcasterProfile.isUserFollowing": followed_profile.get("farcasterProfile", {}).get("isUserFollowing") if followed_profile.get("farcasterProfile") else None
        }

        # Append flattened profile to the list
        flattened_data.append(flattened_profile)

    return pd.DataFrame(flattened_data)

# Request Token

In [5]:
access_token = request_access_token()

# Request Followings

In [19]:
# Fetch all followings for the given profile ID
followings = get_all_followings(PHAVER_PROFILE_ID, LIMIT_PER_REQUEST, MAX_FOLLOWINGS_REQUESTED, access_token)

# Get the points for each following
# followings = get_all_points(followings, access_token)

# Save the followings to followings.json
save_to_json(followings, 'data/followings.json')

print(f"Successfully saved {len(followings)} followings to 'data/followings.json'.")


Successfully saved 100 followings to 'data/followings.json'.


# Convert JSON to DataFrame

In [22]:
# Flatten the data
followings_df = flatten_followings(followings)

# Add the current datetime to the DataFrame
followings_df['updatedAt'] = pd.Timestamp.now(tz='UTC')

# Save the DataFrame to a CSV file
# followings_df.to_csv('data/followings.csv', index=False)


# Filter Out Profiles

In [82]:
filtered_out_followings_df = followings_df[
    (followings_df['followerCount'] < 1000) & 
    (followings_df['credLevel'] < 2) &
    (followings_df['badge'].isnull()) &
    (followings_df['verification'].isnull())
]