In [5]:
from apple_app_reviews_scraper import get_token, fetch_reviews
from app_store_scraper import AppStore
import pandas as pd
import random
import time
import nltk
from nltk.tokenize import sent_tokenize

In [6]:
# Download necessary NLTK resources
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /home/bbae/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [7]:
import requests

def search_app_store(query, country="us", limit=5):
    url = "https://itunes.apple.com/search"
    params = {
        "term": query,          # Search term
        "country": country,     # Country code (e.g., "us" for United States)
        "media": "software",    # Search for apps
        "limit": limit          # Number of results to return
    }
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        return response.json()  # Returns a JSON response
    else:
        raise Exception(f"Error: {response.status_code}")

# Example Usage
results = search_app_store("Khan Academy Kids")
for app in results["results"]:
    print(f"App Name: {app['trackName']}")
    print(f"Developer: {app['sellerName']}")
    print(f"Price: {app['formattedPrice']}")
    print(f"URL: {app['trackViewUrl']}\n")


App Name: Khan Academy Kids
Developer: Khan Academy
Price: Free
URL: https://apps.apple.com/us/app/khan-academy-kids/id1378467217?uo=4

App Name: Khan Academy
Developer: Khan Academy
Price: Free
URL: https://apps.apple.com/us/app/khan-academy/id469863705?uo=4

App Name: ABC Kids Tracing & Games 3-5
Developer: Rabia Aslam
Price: Free
URL: https://apps.apple.com/us/app/abc-kids-tracing-games-3-5/id6480420102?uo=4

App Name: Duolingo - Language Lessons
Developer: Duolingo, Inc
Price: Free
URL: https://apps.apple.com/us/app/duolingo-language-lessons/id570060128?uo=4

App Name: Educational games kids 2-3-4-5
Developer: Kids Academy Co apps: Preschool & Kindergarten Learning Kids Games, Educational Books, Free Songs
Price: Free
URL: https://apps.apple.com/us/app/educational-games-kids-2-3-4-5/id639384857?uo=4

App Name: Kids Drawing Games 6 year olds
Developer: TEACH & DRAW LTD
Price: Free
URL: https://apps.apple.com/us/app/kids-drawing-games-6-year-olds/id1436397693?uo=4



In [3]:
results['results'][1]['trackName']

'Khan Academy'

In [None]:
def get_reviews(country, app_name, app_id, user_agents, num_reviews = 'all'):
    """
    Retrieve num_reviews number of reviews for a given app
    """
    # Get initial token
    token = get_token(country, app_name, app_id, user_agents)
    
    # List to store reviews
    all_reviews = []
    
    # Initial offset
    offset = '1'
    
    # Flag to track if we should continue fetching
    continue_fetching = True
    
    while continue_fetching:
        try:
            # Fetch a batch of reviews
            reviews, new_offset, status_code = fetch_reviews(
                country, app_name, app_id, user_agents, token, offset
            )


            if num_reviews == 'all':
                all_reviews.extend(reviews)
                # Update offset or stop if no more reviews
                if new_offset is None or len(reviews) == 0:
                    continue_fetching = False
                    break
            else:
                if len(all_reviews) + len(reviews) <= num_reviews:
                    # Add reviews to the collection
                    all_reviews.extend(reviews)
                else:
                    num_reviews_left = num_reviews - len(all_reviews)
                    if len(reviews) < num_reviews_left:
                        # if there are no more reviews to fill up requested number of reviews just add the most reviews
                        all_reviews.extend(reviews)
                    else:
                        all_reviews.extend(reviews[:num_reviews_left-1])
                    continue_fetching = False
                    break
            
            # Update offset for next iteration
            offset = new_offset
            
            # Optional: print progress
            print(f"Total reviews collected: {len(all_reviews)}")
            
            # Optional: Add a delay to be nice to the API
            time.sleep(1)
        
        except Exception as e:
            print(f"Error occurred: {e}")
            # Optional: break or continue based on error
            break
    
    return all_reviews

In [5]:
country = 'us'

In [6]:
user_agents = [
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
]

In [None]:
app_df = []
for app in results["results"]:
    app_name = app['trackName']
    print("APP name:", app_name)
    app_id = AppStore(country=country, app_name=app_name).search_id()
    print("APP ID:", app_id)
    token = get_token(country, app_name, app_id, user_agents)
    print(f"Authentication Token: {token}")
    reviews = get_reviews(country, app_name, app_id, user_agents)
    reviews_df = pd.json_normalize(reviews)
    reviews_df['name'] = app_name
    app_df.append(reviews_df)
    print(reviews_df.shape)

2024-12-07 09:30:54,498 [INFO] Base - Searching for app id


APP name: Khan Academy Kids


2024-12-07 09:30:55,535 [INFO] Base - Initialised: AppStore('us', 'khan-academy-kids', 1378467217)
2024-12-07 09:30:55,536 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/us/app/khan-academy-kids/id1378467217


APP ID: 1378467217
Bearer eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IlU4UlRZVjVaRFMifQ.eyJpc3MiOiI3TktaMlZQNDhaIiwiaWF0IjoxNzMxMDkxNzIyLCJleHAiOjE3MzgzNDkzMjIsInJvb3RfaHR0cHNfb3JpZ2luIjpbImFwcGxlLmNvbSJdfQ.sSCpCq8braO2fXXU9Sfzakck8t0MCkSzaIy1Llx4P-uKhBlu_XI3U1JYCp6DleoaJQEzP7BtK5pm23Z5dgKKLw
Authentication Token: eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IlU4UlRZVjVaRFMifQ.eyJpc3MiOiI3TktaMlZQNDhaIiwiaWF0IjoxNzMxMDkxNzIyLCJleHAiOjE3MzgzNDkzMjIsInJvb3RfaHR0cHNfb3JpZ2luIjpbImFwcGxlLmNvbSJdfQ.sSCpCq8braO2fXXU9Sfzakck8t0MCkSzaIy1Llx4P-uKhBlu_XI3U1JYCp6DleoaJQEzP7BtK5pm23Z5dgKKLw
Bearer eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IlU4UlRZVjVaRFMifQ.eyJpc3MiOiI3TktaMlZQNDhaIiwiaWF0IjoxNzMxMDkxNzIyLCJleHAiOjE3MzgzNDkzMjIsInJvb3RfaHR0cHNfb3JpZ2luIjpbImFwcGxlLmNvbSJdfQ.sSCpCq8braO2fXXU9Sfzakck8t0MCkSzaIy1Llx4P-uKhBlu_XI3U1JYCp6DleoaJQEzP7BtK5pm23Z5dgKKLw
Offset: 21
Total reviews collected: 20
Offset: 41
Total reviews collected: 40
Offset: 61
Total reviews collected: 60
Offset: 81
Total revi

KeyboardInterrupt: 

In [7]:
# reviews_df = pd.concat(app_df)
reviews_df = pd.read_csv("data/appstore_reviews.csv")

In [8]:
reviews_df

Unnamed: 0,id,type,offset,n_batch,app_id,attributes.date,attributes.developerResponse.id,attributes.developerResponse.body,attributes.developerResponse.modified,attributes.review,attributes.rating,attributes.isEdited,attributes.userName,attributes.title,name
0,6818393692,user-reviews,21.0,20.0,1378467217,2021-01-01T16:18:30Z,20176204.0,It's exciting to hear that your son is beginni...,2021-01-04T23:34:13Z,My daughter(age 7) used Khan Academy in school...,5.0,False,keeperofthecookies,A+ app for kids!,Khan Academy Kids
1,10881777905,user-reviews,21.0,20.0,1378467217,2024-01-30T17:53:10Z,41714643.0,It's so exciting to hear that Khan Kids helped...,2024-02-02T18:58:58Z,I have used this app for years with all 3 of m...,5.0,False,Zin468,The Absolute Best,Khan Academy Kids
2,11524015869,user-reviews,21.0,20.0,1378467217,2024-07-23T02:36:02Z,45577529.0,We're so pleased you discovered Khan Kids! Hap...,2024-07-28T19:55:00Z,I don’t even know where to start! I am so impr...,5.0,False,tinabeanababy,Absolutely blown away! They are partnered with...,Khan Academy Kids
3,6415974048,user-reviews,21.0,20.0,1378467217,2020-09-10T21:56:42Z,17878584.0,Our design team will be so happy to hear your ...,2020-09-14T23:09:39Z,My 4-year-old has been cruising through severa...,5.0,False,CrocoPhile,Very well-designed for actual learning,Khan Academy Kids
4,5938365704,user-reviews,21.0,20.0,1378467217,2020-05-13T03:32:26Z,15346492.0,Hello Ben. Thanks for your comments and feedba...,2020-05-15T16:58:06Z,Khan Academy Kids is excellent in every respec...,5.0,False,BenjaminTemplar,Best in the World but...,Khan Academy Kids
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7175,9163587906,user-reviews,,19.0,1436397693,2022-10-08T12:39:19Z,32481505.0,Hello. You download it for free and get a tria...,2022-10-10T10:32:42Z,My toddler was able to download this “free” ap...,1.0,False,Psw8387,Predatory App - Do not trust or download this ...,Kids Drawing Games 6 year olds
7176,3670612358,user-reviews,,19.0,1436397693,2019-01-19T20:18:47Z,,,,Gdhfhfyfhfhfhfhfhfffhgy,5.0,False,hfh vhjfjgh,It is so cool 👌👌👃🏽😎😎😎.,Kids Drawing Games 6 year olds
7177,3702110447,user-reviews,,19.0,1436397693,2019-01-27T21:06:12Z,7180582.0,"Hello, thank you very much for the feedback! A...",2019-02-06T13:48:27Z,$36.99 for the full version of the app?! This ...,1.0,False,Murrfect,Are you kidding?,Kids Drawing Games 6 year olds
7178,3539423892,user-reviews,,19.0,1436397693,2018-12-17T01:52:19Z,6481691.0,"Hello. Thank you for your feedback, it is very...",2018-12-20T13:09:04Z,I think it’s a perfect app.,5.0,False,Decibel120,Poop,Kids Drawing Games 6 year olds


In [9]:
renamed_reviews_df = reviews_df.rename(columns=lambda x: x.replace("attributes.", "") if x.startswith("attributes.") else x)
renamed_reviews_df

Unnamed: 0,id,type,offset,n_batch,app_id,date,developerResponse.id,developerResponse.body,developerResponse.modified,review,rating,isEdited,userName,title,name
0,6818393692,user-reviews,21.0,20.0,1378467217,2021-01-01T16:18:30Z,20176204.0,It's exciting to hear that your son is beginni...,2021-01-04T23:34:13Z,My daughter(age 7) used Khan Academy in school...,5.0,False,keeperofthecookies,A+ app for kids!,Khan Academy Kids
1,10881777905,user-reviews,21.0,20.0,1378467217,2024-01-30T17:53:10Z,41714643.0,It's so exciting to hear that Khan Kids helped...,2024-02-02T18:58:58Z,I have used this app for years with all 3 of m...,5.0,False,Zin468,The Absolute Best,Khan Academy Kids
2,11524015869,user-reviews,21.0,20.0,1378467217,2024-07-23T02:36:02Z,45577529.0,We're so pleased you discovered Khan Kids! Hap...,2024-07-28T19:55:00Z,I don’t even know where to start! I am so impr...,5.0,False,tinabeanababy,Absolutely blown away! They are partnered with...,Khan Academy Kids
3,6415974048,user-reviews,21.0,20.0,1378467217,2020-09-10T21:56:42Z,17878584.0,Our design team will be so happy to hear your ...,2020-09-14T23:09:39Z,My 4-year-old has been cruising through severa...,5.0,False,CrocoPhile,Very well-designed for actual learning,Khan Academy Kids
4,5938365704,user-reviews,21.0,20.0,1378467217,2020-05-13T03:32:26Z,15346492.0,Hello Ben. Thanks for your comments and feedba...,2020-05-15T16:58:06Z,Khan Academy Kids is excellent in every respec...,5.0,False,BenjaminTemplar,Best in the World but...,Khan Academy Kids
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7175,9163587906,user-reviews,,19.0,1436397693,2022-10-08T12:39:19Z,32481505.0,Hello. You download it for free and get a tria...,2022-10-10T10:32:42Z,My toddler was able to download this “free” ap...,1.0,False,Psw8387,Predatory App - Do not trust or download this ...,Kids Drawing Games 6 year olds
7176,3670612358,user-reviews,,19.0,1436397693,2019-01-19T20:18:47Z,,,,Gdhfhfyfhfhfhfhfhfffhgy,5.0,False,hfh vhjfjgh,It is so cool 👌👌👃🏽😎😎😎.,Kids Drawing Games 6 year olds
7177,3702110447,user-reviews,,19.0,1436397693,2019-01-27T21:06:12Z,7180582.0,"Hello, thank you very much for the feedback! A...",2019-02-06T13:48:27Z,$36.99 for the full version of the app?! This ...,1.0,False,Murrfect,Are you kidding?,Kids Drawing Games 6 year olds
7178,3539423892,user-reviews,,19.0,1436397693,2018-12-17T01:52:19Z,6481691.0,"Hello. Thank you for your feedback, it is very...",2018-12-20T13:09:04Z,I think it’s a perfect app.,5.0,False,Decibel120,Poop,Kids Drawing Games 6 year olds


In [10]:
cleaned_df = renamed_reviews_df.loc[renamed_reviews_df["review"].str.len() >= 4 ]

In [11]:
cleaned_df.shape

(7163, 15)

In [12]:
cleaned_df

Unnamed: 0,id,type,offset,n_batch,app_id,date,developerResponse.id,developerResponse.body,developerResponse.modified,review,rating,isEdited,userName,title,name
0,6818393692,user-reviews,21.0,20.0,1378467217,2021-01-01T16:18:30Z,20176204.0,It's exciting to hear that your son is beginni...,2021-01-04T23:34:13Z,My daughter(age 7) used Khan Academy in school...,5.0,False,keeperofthecookies,A+ app for kids!,Khan Academy Kids
1,10881777905,user-reviews,21.0,20.0,1378467217,2024-01-30T17:53:10Z,41714643.0,It's so exciting to hear that Khan Kids helped...,2024-02-02T18:58:58Z,I have used this app for years with all 3 of m...,5.0,False,Zin468,The Absolute Best,Khan Academy Kids
2,11524015869,user-reviews,21.0,20.0,1378467217,2024-07-23T02:36:02Z,45577529.0,We're so pleased you discovered Khan Kids! Hap...,2024-07-28T19:55:00Z,I don’t even know where to start! I am so impr...,5.0,False,tinabeanababy,Absolutely blown away! They are partnered with...,Khan Academy Kids
3,6415974048,user-reviews,21.0,20.0,1378467217,2020-09-10T21:56:42Z,17878584.0,Our design team will be so happy to hear your ...,2020-09-14T23:09:39Z,My 4-year-old has been cruising through severa...,5.0,False,CrocoPhile,Very well-designed for actual learning,Khan Academy Kids
4,5938365704,user-reviews,21.0,20.0,1378467217,2020-05-13T03:32:26Z,15346492.0,Hello Ben. Thanks for your comments and feedba...,2020-05-15T16:58:06Z,Khan Academy Kids is excellent in every respec...,5.0,False,BenjaminTemplar,Best in the World but...,Khan Academy Kids
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7175,9163587906,user-reviews,,19.0,1436397693,2022-10-08T12:39:19Z,32481505.0,Hello. You download it for free and get a tria...,2022-10-10T10:32:42Z,My toddler was able to download this “free” ap...,1.0,False,Psw8387,Predatory App - Do not trust or download this ...,Kids Drawing Games 6 year olds
7176,3670612358,user-reviews,,19.0,1436397693,2019-01-19T20:18:47Z,,,,Gdhfhfyfhfhfhfhfhfffhgy,5.0,False,hfh vhjfjgh,It is so cool 👌👌👃🏽😎😎😎.,Kids Drawing Games 6 year olds
7177,3702110447,user-reviews,,19.0,1436397693,2019-01-27T21:06:12Z,7180582.0,"Hello, thank you very much for the feedback! A...",2019-02-06T13:48:27Z,$36.99 for the full version of the app?! This ...,1.0,False,Murrfect,Are you kidding?,Kids Drawing Games 6 year olds
7178,3539423892,user-reviews,,19.0,1436397693,2018-12-17T01:52:19Z,6481691.0,"Hello. Thank you for your feedback, it is very...",2018-12-20T13:09:04Z,I think it’s a perfect app.,5.0,False,Decibel120,Poop,Kids Drawing Games 6 year olds


In [13]:
cleaned_df.to_csv("data/appstore_reviews.csv", index=False)

In [21]:
review_only = cleaned_df["review"]
review_only

0       My daughter(age 7) used Khan Academy in school...
1       I have used this app for years with all 3 of m...
2       I don’t even know where to start! I am so impr...
3       My 4-year-old has been cruising through severa...
4       Khan Academy Kids is excellent in every respec...
                              ...                        
7175    My toddler was able to download this “free” ap...
7176                              Gdhfhfyfhfhfhfhfhfffhgy
7177    $36.99 for the full version of the app?! This ...
7178                          I think it’s a perfect app.
7179                                              Amazing
Name: review, Length: 7163, dtype: object

In [22]:
first_50 = review_only.loc[:49]
first_50

0     My daughter(age 7) used Khan Academy in school...
1     I have used this app for years with all 3 of m...
2     I don’t even know where to start! I am so impr...
3     My 4-year-old has been cruising through severa...
4     Khan Academy Kids is excellent in every respec...
5     We recently purchased an iPad as corona and sh...
6     I thought I will never let my kid get on any a...
7     This is a really excellent app, not for the le...
8     Having learned a lot from Kahn Academy over th...
9     I noticed that new features to restrict button...
10    Thank you Khan academy kids, you saved us! We’...
11    I love the graphics on this app and the fact t...
12    Ok, you’ve heard what the parents think, but h...
13    We’ve been using khan academy for a few weeks ...
14    This game can help so much in reading comprehe...
15    My 3 year old daughter, Katherine, has always ...
16    We were introduced to ABC mouse by a friend bu...
17    Due to lack of transportation, my daughter

In [25]:
# Tokenize each review into sentences
sentences = []
for review in first_50:
    review_sentences = sent_tokenize(review)  # Split review into sentences
    sentences.extend(review_sentences)       # Add sentences to the list

# Create a new DataFrame with each sentence as a separate row
sentences_df = pd.DataFrame(sentences, columns=["sentence"])
sentences_df

Unnamed: 0,sentence
0,My daughter(age 7) used Khan Academy in school...
1,Our school district listed Khan Academy as a r...
2,We used it in addition to her other school wor...
3,We decided to try this app for our son that is...
4,"Our son (age 4) LOVES this app, he willingly s..."
...,...
537,My teacher started giving us assignments in kh...
538,I downloaded this app on my sister’s IPad and ...
539,"Now she is able to count to 50, no problem, sh..."
540,"She walks around spelling, or trying to spell ..."


In [27]:
# Save to CSV
sentences_df.to_csv("data/review_sentences.csv", index=False)