Installing dependencies

In [None]:
!pip install google-play-scraper openai tqdm pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m


Importing libraries

In [1]:
from google_play_scraper import app, search, Sort, reviews
import pandas as pd
import time

Get the AI apps' IDs

In [2]:
def search_ai_apps(query="AI", n_results=10):
    """Search for AI apps on Google Play Store"""
    print(f"Searching for '{query}' apps...")
    search_results = search(
        query,
        lang="en",
        country="us",
        n_hits=n_results
    )
    return search_results

Get the reviews based on IDs

In [3]:
def get_app_reviews(app_id, app_name, max_reviews=100):
    """Retrieve reviews for a specific app"""
    print(f"Fetching reviews for {app_name} ({app_id})...")
    try:
        # Use reviews() instead of reviews_all() to limit the number of reviews
        review_results, continuation_token = reviews(
            app_id,
            lang="en",
            country="us",
            sort=Sort.NEWEST,
            count=max_reviews  # This limits the number of reviews
        )

        # Print the structure of the first review for debugging
        if review_results:
            print(f"Review keys available: {list(review_results[0].keys())}")

        # Convert to a structured format with safer field access
        review_data = []
        for review in review_results:
            review_item = {
                'app_name': app_name,
                'reviewer': review.get('userName', 'Unknown'),
                'date': review.get('at', None),
                'score': review.get('score', None),
                'content': review.get('content', ''),
                # Use get() with a default value for potentially missing keys
                'thumbs_up': review.get('thumbsUp', 0)  # Default to 0 if missing
            }
            review_data.append(review_item)

        return review_data
    except Exception as e:
        print(f"Error retrieving reviews for {app_name}: {str(e)}")
        # Print a full traceback for debugging
        import traceback
        traceback.print_exc()
        return []

# Main workflow
- Searches for AI assistant apps
- Retrieves their details (developer, rating, and number of ratings)
- Collects user reviews
- Stores the reviews in a Pandas DataFrame
- Saves them as a CSV file
- Displays a sample of the collected reviews

In [None]:
# Search for AI apps
ai_apps = search_ai_apps(query="AI", n_results=100)

all_reviews = []

# Get reviews for each app
for app_info in ai_apps:
    app_id = app_info['appId']
    app_name = app_info['title']

    # Get app details to show more info
    app_details = app(app_id)
    print(f"\nApp: {app_name}")
    print(f"Developer: {app_details['developer']}")
    print(f"Rating: {app_details['score']} ({app_details['ratings']} ratings)")

    # Get reviews
    app_reviews = get_app_reviews(app_id, app_name, max_reviews=1000)
    all_reviews.extend(app_reviews)

    # Pause to avoid hitting rate limits
    time.sleep(1)

# Create a DataFrame and save to CSV
if all_reviews:
    df = pd.DataFrame(all_reviews)
    csv_filename = "ai_app_reviews.csv"
    df.to_csv(csv_filename, index=False)
    print(f"\nSaved {len(all_reviews)} reviews to {csv_filename}")

    # Show sample of reviews
    print("\nSample reviews:")
    print(df[['app_name', 'score', 'content']].head(3))
else:
    print("No reviews were collected.")

Searching for 'AI' apps...

App: PolyBuzz:formerly Poly.AI
Developer: CLOUD WHALE INTERACTIVE TECHNOLOGY LLC.
Rating: 4.1941237 (433756 ratings)
Fetching reviews for PolyBuzz:formerly Poly.AI (ai.socialapps.speakmaster)...
Review keys available: ['reviewId', 'userName', 'userImage', 'content', 'score', 'thumbsUpCount', 'reviewCreatedVersion', 'at', 'replyContent', 'repliedAt', 'appVersion']

App: Talkie: Creative AI Community
Developer: SUBSUP
Rating: 4.540069 (513587 ratings)
Fetching reviews for Talkie: Creative AI Community (com.weaver.app.prod)...
Review keys available: ['reviewId', 'userName', 'userImage', 'content', 'score', 'thumbsUpCount', 'reviewCreatedVersion', 'at', 'replyContent', 'repliedAt', 'appVersion']

App: Chai: Chat AI Platform
Developer: Chai Research Corp.
Rating: 4.2794785 (349418 ratings)
Fetching reviews for Chai: Chat AI Platform (com.Beauchamp.Messenger.external)...
Review keys available: ['reviewId', 'userName', 'userImage', 'content', 'score', 'thumbsUpCoun

In [None]:
import threading
from tqdm import tqdm
from openai import OpenAI
import pandas as pd
import os
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))  # Replace with your actual API key

# Function to analyze reviews using OpenAI API
def analyze_reviews_batch(reviews, app_name):
    prompt = f"""Analyze these {app_name} app reviews and extract:
1. Key features/functionality mentioned
2. Notable information about the app
3. Summary of user sentiment

Reviews:
{reviews}

Format your response as:
Features: [comma separated list]
Notable Info: [comma separated list]
Summary: [brief summary]"""
    
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # Using a valid OpenAI model
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=500
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error in OpenAI API call: {str(e)}")
        return f"Error analyzing reviews: {str(e)}"

# Read the filtered reviews
filtered_df = pd.read_csv("datasets/filtered_ai_app_reviews.csv")

# Filter reviews with content length >= 200 characters
filtered_df = filtered_df[filtered_df['content'].str.len() >= 200]

# Get unique app names
app_names = filtered_df['app_name'].unique()
results = []
results_lock = threading.Lock()

# Process reviews for each app in batches of 20
def process_app_reviews(app_name):
    try:
        app_reviews = filtered_df[filtered_df['app_name'] == app_name]
        batch_size = 20
        num_batches = (len(app_reviews) // batch_size) + 1
        
        # Initialize progress bar for this app
        with tqdm(total=len(app_reviews), desc=f"Processing {app_name}", unit="review") as pbar:
            for i in range(0, len(app_reviews), batch_size):
                end_idx = min(i + batch_size, len(app_reviews))
                batch_reviews = "\n".join(app_reviews['content'].iloc[i:end_idx])
                analysis = analyze_reviews_batch(batch_reviews, app_name)
                with results_lock:
                    results.append((app_name, analysis))
                pbar.update(end_idx - i)  # Update progress bar
    except Exception as e:
        print(f"Error processing reviews for {app_name}: {str(e)}")

# Create and start threads for each app
threads = []
for app in app_names:
    t = threading.Thread(target=process_app_reviews, args=(app,))
    threads.append(t)
    t.start()
# Initialize overall progress bar for thread completion
with tqdm(total=len(threads), desc="Overall Progress", unit="app") as overall_pbar:
    for t in threads:
        t.join()
        overall_pbar.update(1)  # Update overall progress when each thread completes

# Convert results to DataFrame
analysis_df = pd.DataFrame(results, columns=['App Name', 'Analysis'])

# Get unique App Names and join their Analysis values
unique_apps = analysis_df.groupby('App Name')['Analysis'].apply(' '.join).reset_index()

# Save to CSV with ; separator
unique_apps.to_csv("datasets/app_reviews_analysis.csv", index=False, sep=';')

# Print results
print("\nAnalysis Results:")
for app_name, result in results:
    print(f"\nApp: {app_name}")
    print(result)


Processing PolyBuzz:formerly Poly.AI:   0%|          | 0/193 [00:00<?, ?review/s]

[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A
Processing ChatGPT: 100%|██████████| 8/8 [00:02<00:00,  3.81review/s]






[A[A[A[A[A[A



[A[A[A[A







[A[A[A[A[A[A[A[A






[A[A[A[A[A[A[A




[A[A[A[A[A


[A[A[A

Processing PolyBuzz:formerly Poly.AI:  10%|█         | 20/193 [00:04<00:40,  4.25review/s]



Processing Google Gemini: 100%|██████████| 26/26 [00:04<00:00,  5.39review/s]






[A[A[A[A[A[A


[A[A[A







Processing PolyBuzz:formerly Poly.AI:  21%|██        | 40/193 [00:07<00:27,  5.62review/s]






[A[A[A[A[A[A[A

[A[A





Processing PolyBuzz:formerly Poly.AI:  31%|███       | 60/193 [00:10<00:21,  6.24review/s]


[A[A[A







Processing ​​Microsoft Copilot: 100%|██████████| 48/48 [00:10<00:00,  4.69revie


Analysis Results:

App: ChatGPT
Features: animation of images, transcription/speaking feature, image generation, conversion of photos to Ghibli art, chat archiving, access to different model versions (GPT-4), user limits on messaging

Notable Info: users reported issues with animation quality, network errors affecting transcription, delays in image generation, frustrations with user interface and messaging limits, complaints about archived chats not appearing, and changes to free plan limits

Summary: User sentiment is mixed; while many users appreciate the app's capabilities and find it useful, there are significant frustrations related to functionality, user interface, and limitations, leading to requests for improvements and fixes.

App: Talkie: Creative AI Community
Features: character creation, AI conversations, ability to save and sync chat slots, daily reward spins, Pro+ membership, censorship filters, user-generated content

Notable Info: users report excessive ads, complaints




In [21]:
df = pd.read_csv('datasets/ai_app_reviews.csv')
df2 = pd.read_csv('datasets/app_reviews_analysis.csv', sep=';')
df3 = pd.read_csv('datasets/AI_apps_full_dataset.csv')

# Get unique App Names and join their User Review Analysis values
unique_apps = df2.groupby('App Name')['Analysis'].apply(' '.join).reset_index()
unique_apps = unique_apps.rename(columns={'Analysis': 'User Review Analysis'})

merged_df = pd.merge(unique_apps, df3, how='left', on='App Name')

# Move User Review Analysis column to the end
cols = [col for col in merged_df.columns if col != 'User Review Analysis'] + ['User Review Analysis']
merged_df = merged_df[cols]
merged_df

Unnamed: 0,App Name,Publisher,Link,Full Description,App Info Modal,Shared Data,Collected Data,Security Practices,User Review Analysis
0,Chai: Chat AI Platform,Chai Research Corp.,https://play.google.com/store/apps/details?id=...,Build and Share AI,Our app revolutionizes the way we interact wit...,"Device or other IDs, Personal info, App info a...","Device or other IDs, Personal info, Messages, ...","Data is encrypted in transit, You can request ...","Features: memory options, character creation, ..."
1,"Character AI: Chat, Talk, Text",Character.AI,https://play.google.com/store/apps/details?id=...,"Super-intelligent AI chat bots that hear you, ...",AI Chat just got real.\nImagine speaking to su...,,"Personal info, App info and performance, App a...","Data is encrypted in transit, You can request ...","Features: groups, delete button, persona POV w..."
2,ChatGPT,OpenAI,https://play.google.com/store/apps/details?id=...,The official app by OpenAI,"With the official ChatGPT app, get instant ans...",Device or other IDs,"App info and performance, Messages, App activi...","Data is encrypted in transit, You can request ...","Features: animation of images, transcription/s..."
3,Google Gemini,Google LLC,https://play.google.com/store/apps/details?id=...,"Chat to start writing, planning, learning and ...",The Google Gemini app is an AI assistant that ...,,"Device or other IDs, App info and performance,...","Data is encrypted in transit, You can request ...","**Features:** music player integration, detail..."
4,"Linky AI: Chat, Play, Connect",Skywork AI Pte. Ltd.,https://play.google.com/store/apps/details?id=...,"Linky, an unprecedented AI chatbot, brings inf...",Want to chat with character AI to embark on a ...,"Device or other IDs, App activity","Device or other IDs, Personal info, App info a...","Data is encrypted in transit, You can request ...","Features: AI chat with various characters, sto..."
5,Perplexity - Ask Anything,PerplexityAI,https://play.google.com/store/apps/details?id=...,The most powerful answer engine powered by AI.,Perplexity—Where Knowledge Begins. The answers...,"App info and performance, Device or other IDs","Location, App activity, App info and performan...","Data is encrypted in transit, You can request ...","Features: AI search engine, follow-up question..."
6,PolyBuzz:formerly Poly.AI,CLOUD WHALE INTERACTIVE TECHNOLOGY LLC.,https://play.google.com/store/apps/details?id=...,Chat & engage with your anime friends and star...,Our app changes the way we interact with AI ch...,Location,"App info and performance, App activity, Person...","Data is encrypted in transit, You can request ...","**Features:** AI character generation, rolepla..."
7,Question.AI - Chatbot&Math AI,D3 DIMENSION TECHNOLOGY PTE.LTD.,https://play.google.com/store/apps/details?id=...,Chatbot: Scan&Ask AI Assistant Anything and Ge...,Your Ultimate AI Chatbot Assistant!\nExperienc...,Location,"Device or other IDs, Personal info, Files and ...","Data is encrypted in transit, You can request ...","**Features:** homework assistance, personalize..."
8,Talkie: Creative AI Community,SUBSUP,https://play.google.com/store/apps/details?id=...,Unleash Your AI Imagination,Create Your AI-Powered Universe with Talkie — ...,App activity,,"Data is encrypted in transit, You can request ...","Features: character creation, AI conversations..."
9,​​Microsoft Copilot,Microsoft Corporation,https://play.google.com/store/apps/details?id=...,Calm. Confident. Copilot. Here to help. A comp...,Microsoft Copilot is the AI companion for ever...,,,"Data is encrypted in transit, You can request ...","Features: asking questions, reviewing document..."


In [22]:
import pandas as pd
from openai import OpenAI
import os
from tqdm.auto import tqdm # Use auto version for notebook compatibility

# Ensure merged_df exists from the previous cell
if 'merged_df' not in locals():
    print("Error: merged_df not found. Please run the previous cell first.")
else:
    # Initialize OpenAI client - Make sure your API key is set as an environment variable
    # or replace os.getenv('OPENAI_API_KEY') with your actual key string
    try:
        client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
        # A small test call to verify the API key
        client.models.list()
        print("OpenAI client initialized successfully.")
    except Exception as e:
        print(f"Error initializing OpenAI client: {e}")
        client = None # Set client to None if initialization fails

    if client:
        def analyze_difference(row):
            # Replace NaN values with "Not specified" for clarity in the prompt
            row = row.fillna("Not specified")

            user_review_summary = row['User Review Analysis']
            developer_desc = f"Full Description: {row['Full Description']}\nApp Info Modal: {row['App Info Modal']}\nShared Data: {row['Shared Data']}\nCollected Data: {row['Collected Data']}\nSecurity Practices: {row['Security Practices']}"

            prompt = f"""Compare the user review summary with the developer's description for the app '{row['App Name']}'.

        User Review Summary (based on user feedback):
        {user_review_summary}

        Developer's Description & Data Practices (official information):
        {developer_desc}

        Task: Identify and summarize the key differences, discrepancies, or contradictions between the user experiences (from the review summary) and the developer's claims/descriptions. Focus on aspects like:
        - Promised features vs. actual user experience/complaints.
        - App performance claims vs. user-reported issues (bugs, lag, crashes).
        - Stated data privacy/security practices vs. user concerns or experiences.
        - Marketing language/tone vs. overall user sentiment reality.

        Output a concise summary highlighting these differences. If there are no major discrepancies, state that.

        Difference Analysis:
        """
            try:
                response = client.chat.completions.create(
                    model="gpt-4o-mini",
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.1,
                    max_tokens=400 # Increased slightly for potentially complex comparisons
                )
                analysis_text = response.choices[0].message.content.strip()
                # Remove the "Difference Analysis:" prefix if the model includes it
                if analysis_text.startswith("Difference Analysis:"):
                    analysis_text = analysis_text[len("Difference Analysis:"):].strip()
                return analysis_text
            except Exception as e:
                # Print specific error for debugging
                print(f"\nError analyzing differences for {row['App Name']}: {type(e).__name__} - {str(e)}")
                # Return a more informative error string in the DataFrame
                return f"Error during API call: {type(e).__name__}"

        # Initialize tqdm for pandas
        tqdm.pandas(desc="Analyzing Differences")

        # Apply the function - Ensure the DataFrame is not empty
        if not merged_df.empty:
             # Create a copy to avoid SettingWithCopyWarning
            merged_df_copy = merged_df.copy()
            merged_df_copy['Difference Analysis'] = merged_df_copy.progress_apply(analyze_difference, axis=1)
            merged_df = merged_df_copy # Assign back to original variable name
            print("\\nDifference analysis complete.")
            # Display the relevant columns of the updated DataFrame
            print(merged_df[['App Name', 'User Review Analysis', 'Difference Analysis']].head())
        else:
            print("merged_df is empty. Skipping analysis.")
    else:
        print("OpenAI client failed to initialize. Cannot perform difference analysis.")

# Display the final few rows as well to check for errors
if 'merged_df' in locals() and 'Difference Analysis' in merged_df.columns:
    print("\\nLast 5 rows with Difference Analysis:")
    print(merged_df[['App Name', 'Difference Analysis']].tail())


  from .autonotebook import tqdm as notebook_tqdm


OpenAI client initialized successfully.


Analyzing Differences: 100%|██████████| 10/10 [01:07<00:00,  6.77s/it]

\nDifference analysis complete.
                         App Name  \
0          Chai: Chat AI Platform   
1  Character AI: Chat, Talk, Text   
2                         ChatGPT   
3                   Google Gemini   
4   Linky AI: Chat, Play, Connect   

                                User Review Analysis  \
0  Features: memory options, character creation, ...   
1  Features: groups, delete button, persona POV w...   
2  Features: animation of images, transcription/s...   
3  **Features:** music player integration, detail...   
4  Features: AI chat with various characters, sto...   

                                 Difference Analysis  
0  ### Difference Analysis:\n\n1. **Promised Feat...  
1  ### Difference Analysis:\n\n1. **Promised Feat...  
2  ### Difference Analysis:\n\n1. **Promised Feat...  
3  The analysis of the user review summary and th...  
4  ### Difference Analysis:\n\n1. **Promised Feat...  
\nLast 5 rows with Difference Analysis:
                        App Name  \
5 




In [23]:
merged_df.to_csv('datasets/app_reviews_analysis.csv', index=False, sep=';')

Unnamed: 0,App Name,Publisher,Link,Full Description,App Info Modal,Shared Data,Collected Data,Security Practices,User Review Analysis,Difference Analysis
0,Chai: Chat AI Platform,Chai Research Corp.,https://play.google.com/store/apps/details?id=...,Build and Share AI,Our app revolutionizes the way we interact wit...,"Device or other IDs, Personal info, App info a...","Device or other IDs, Personal info, Messages, ...","Data is encrypted in transit, You can request ...","Features: memory options, character creation, ...",### Difference Analysis:\n\n1. **Promised Feat...
1,"Character AI: Chat, Talk, Text",Character.AI,https://play.google.com/store/apps/details?id=...,"Super-intelligent AI chat bots that hear you, ...",AI Chat just got real.\nImagine speaking to su...,,"Personal info, App info and performance, App a...","Data is encrypted in transit, You can request ...","Features: groups, delete button, persona POV w...",### Difference Analysis:\n\n1. **Promised Feat...
2,ChatGPT,OpenAI,https://play.google.com/store/apps/details?id=...,The official app by OpenAI,"With the official ChatGPT app, get instant ans...",Device or other IDs,"App info and performance, Messages, App activi...","Data is encrypted in transit, You can request ...","Features: animation of images, transcription/s...",### Difference Analysis:\n\n1. **Promised Feat...
3,Google Gemini,Google LLC,https://play.google.com/store/apps/details?id=...,"Chat to start writing, planning, learning and ...",The Google Gemini app is an AI assistant that ...,,"Device or other IDs, App info and performance,...","Data is encrypted in transit, You can request ...","**Features:** music player integration, detail...",The analysis of the user review summary and th...
4,"Linky AI: Chat, Play, Connect",Skywork AI Pte. Ltd.,https://play.google.com/store/apps/details?id=...,"Linky, an unprecedented AI chatbot, brings inf...",Want to chat with character AI to embark on a ...,"Device or other IDs, App activity","Device or other IDs, Personal info, App info a...","Data is encrypted in transit, You can request ...","Features: AI chat with various characters, sto...",### Difference Analysis:\n\n1. **Promised Feat...
5,Perplexity - Ask Anything,PerplexityAI,https://play.google.com/store/apps/details?id=...,The most powerful answer engine powered by AI.,Perplexity—Where Knowledge Begins. The answers...,"App info and performance, Device or other IDs","Location, App activity, App info and performan...","Data is encrypted in transit, You can request ...","Features: AI search engine, follow-up question...",### Difference Analysis:\n\n1. **Promised Feat...
6,PolyBuzz:formerly Poly.AI,CLOUD WHALE INTERACTIVE TECHNOLOGY LLC.,https://play.google.com/store/apps/details?id=...,Chat & engage with your anime friends and star...,Our app changes the way we interact with AI ch...,Location,"App info and performance, App activity, Person...","Data is encrypted in transit, You can request ...","**Features:** AI character generation, rolepla...",### Difference Analysis:\n\n1. **Promised Feat...
7,Question.AI - Chatbot&Math AI,D3 DIMENSION TECHNOLOGY PTE.LTD.,https://play.google.com/store/apps/details?id=...,Chatbot: Scan&Ask AI Assistant Anything and Ge...,Your Ultimate AI Chatbot Assistant!\nExperienc...,Location,"Device or other IDs, Personal info, Files and ...","Data is encrypted in transit, You can request ...","**Features:** homework assistance, personalize...",### Difference Analysis:\n\n1. **Promised Feat...
8,Talkie: Creative AI Community,SUBSUP,https://play.google.com/store/apps/details?id=...,Unleash Your AI Imagination,Create Your AI-Powered Universe with Talkie — ...,App activity,,"Data is encrypted in transit, You can request ...","Features: character creation, AI conversations...",### Difference Analysis:\n\n1. **Promised Feat...
9,​​Microsoft Copilot,Microsoft Corporation,https://play.google.com/store/apps/details?id=...,Calm. Confident. Copilot. Here to help. A comp...,Microsoft Copilot is the AI companion for ever...,,,"Data is encrypted in transit, You can request ...","Features: asking questions, reviewing document...",### Difference Analysis:\n\n1. **Promised Feat...
