In [1]:
import pandas as pd
def find_unique_endpoints(file_path):

    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(file_path)

    # Remove query parameters from the URL (split by '?' and keep the first part)
    df['endpoint'] = df['url'].apply(lambda x: x.split('?')[0])

    # Get unique combinations of 'endpoint' and 'method'
    unique_combinations = df[['endpoint', 'method']].drop_duplicates()

    # Save the unique combinations to a new CSV file
    unique_combinations.to_csv(file_path.split('/')[-1], index=False)

    print("CSV file with unique endpoint-method combinations has been created: 'unique_endpoint_method_combinations.csv'")



In [7]:
find_unique_endpoints('./../healthioProxy.csv')

CSV file with unique endpoint-method combinations has been created: 'unique_endpoint_method_combinations.csv'


In [5]:
import pandas as pd
def onlyGraphQl(file_path):

    # Load the new CSV file with unique combinations
    df = pd.read_csv(file_path)

    # Include only rows where the endpoint contains '/graphql' (case-insensitive)
    filtered_df = df[df['endpoint'].str.contains('/graphql', case=False)]

    # Save the filtered data to a new CSV file
    filtered_df.to_csv(f'graphql_{file_path}', index=False)

    print("Filtered CSV file has been created: 'graphql_endpoint_method_combinations.csv'")


In [6]:
onlyGraphQl('healthio.csv')

Filtered CSV file has been created: 'graphql_endpoint_method_combinations.csv'


In [12]:
import pandas as pd
import json
import re

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('./../healthioProxy.csv')

# Filter to include only rows where the endpoint contains '/graphql' (case-insensitive)
graphql_df = df[df['url'].str.contains('/graphql', case=False)]

# Function to extract the operation name from the 'query' key in the request_body
def extract_operation_name_from_query(body):
    if isinstance(body, str):  # Check if the body is a string
        try:
            # Parse the body as JSON
            body_dict = json.loads(body.replace('""', '"'))  # Fix escaped quotes
            query = body_dict.get('query', '')  # Extract the 'query' field
            
            # Strip leading/trailing whitespace and newlines
            query = query.strip()

            # Use regex to find 'query' or 'mutation' and capture the word that follows
            match = re.search(r'(query|mutation)\s*([a-zA-Z_][\w]*)', query)
            if match:
                return match.group(2)  # Return the operation name (the second capture group)
            else:
                return 'Unknown'  # If no match is found, return 'Unknown'
        except (json.JSONDecodeError, TypeError):
            return 'Unknown'
    else:
        return 'Unknown'

# Function to extract the first occurrence of 'query' or 'mutation' from the 'query' key in the request_body
def extract_query_type(body):
    if isinstance(body, str):  # Check if the body is a string
        try:
            # Parse the body as JSON
            body_dict = json.loads(body.replace('""', '"'))  # Fix escaped quotes
            query = body_dict.get('query', '')  # Extract the 'query' field
            query = query.strip()  # Strip leading/trailing whitespace and newlines
            
            # Use regex to find 'query' or 'mutation'
            match = re.search(r'(query|mutation)', query)
            if match:
                return match.group(1)  # Return 'query' or 'mutation'
            else:
                return 'Unknown'
        except (json.JSONDecodeError, TypeError):
            return 'Unknown'
    else:
        return 'Unknown'

# Apply the functions to extract 'operationName' and 'query_type' from the request_body
graphql_df['operationName'] = graphql_df['request_body'].apply(extract_operation_name_from_query)
graphql_df['query_type'] = graphql_df['request_body'].apply(extract_query_type)

# Filter out rows where 'operationName' is 'Unknown'
graphql_df = graphql_df[graphql_df['operationName'] != 'Unknown']

# Get unique combinations of 'url' (endpoint), 'operationName', and 'query_type'
unique_combinations = graphql_df[['url', 'operationName', 'query_type']].drop_duplicates()

# Save the unique combinations to a new CSV file
unique_combinations.to_csv('graphql_endpoint_operation_query_combinations.csv', index=False)

print("Filtered CSV file with unique endpoint-operationName-query combinations has been created: 'graphql_endpoint_operation_query_combinations.csv'")


{"operationName":"GetUser","variables":{},"query":"query GetUser {\n  user {\n    email\n    firstName\n    createdAt\n    dateOfBirth\n    uuid\n    languagePreference\n    sexAssigned\n    returning\n    isOptum\n    tags\n    applicationPathway {\n      indication\n      program\n      __typename\n    }\n    client {\n      identifier\n      availablePrograms\n      dmcEnabled\n      tabletRequired\n      globalProduct\n      config {\n        supportsUnification\n        m1Trial\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n"}
{"operationName":"GetUser","variables":{},"query":"query GetUser {\n  user {\n    email\n    firstName\n    createdAt\n    dateOfBirth\n    uuid\n    languagePreference\n    sexAssigned\n    returning\n    isOptum\n    tags\n    applicationPathway {\n      indication\n      program\n      __typename\n    }\n    client {\n      identifier\n      availablePrograms\n      dmcEnabled\n      tabletRequired\n      globalProduct\n   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  graphql_df['operationName'] = graphql_df['request_body'].apply(extract_operation_name)
