# **App scraper**

## **[Setup]**

In [None]:
import requests
import csv
import time
import logging

import pandas as pd
from google_play_scraper import search

In [13]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [14]:
KEYWORDS = [
    'asthma',
    'asthma tracker',
    'inhaler tracker',
    'peak flow',
    'asthma management',
    'asthma log',
    'allergy induced asthma',
    'asthma symptoms',
    'asthma monitoring app',
    'asthma action plan'
]

## **[App Store - Apple]**

**[Configuration]**

In [15]:
# The base URL for the iTunes Search API
API_BASE_URL = 'https://itunes.apple.com/search'

# Output CSV file name
CSV_FILE = 'data/app_store_asthma_apps_desc.csv'

# Number of top results to fetch for each keyword
RESULT_LIMIT = 20

**[Scraper Function]**

In [16]:
def scrape_app_store():
    logging.info(f"Starting App Store scrape. Output will be saved to {CSV_FILE}")

    csv_headers = [
        'Keyword Searched',
        'App Name',
        'App ID',
        'Seller',
        'Price',
        'Average Rating',
        'Rating Count',
        'Genre',
        'App URL',
        'Description'
    ]

    try:
        with open(CSV_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(csv_headers)

            for keyword in KEYWORDS:
                logging.info(f"Searching for keyword: '{keyword}'...")

                params = {
                    'term': keyword,
                    'country': 'US',
                    'media': 'software',
                    'limit': RESULT_LIMIT
                }

                try:
                    response = requests.get(API_BASE_URL, params=params, timeout=10)
                    
                    response.raise_for_status() 

                    data = response.json()
                    results = data.get('results', [])

                    if not results:
                        logging.warning(f"No results found for keyword: '{keyword}'")
                        continue

                    for app in results:
                        writer.writerow([
                            keyword,
                            app.get('trackName'),
                            app.get('trackId'),
                            app.get('artistName'),
                            app.get('formattedPrice', app.get('price', 'N/A')),
                            app.get('averageUserRating', 'N/A'),
                            app.get('userRatingCount', 'N/A'),
                            app.get('primaryGenreName', 'N/A'),
                            app.get('trackViewUrl'),
                            app.get('description', 'N/A')
                        ])

                    logging.info(f"Successfully saved {len(results)} apps for '{keyword}'")

                except requests.exceptions.RequestException as e:
                    logging.error(f"HTTP Request failed for keyword '{keyword}': {e}")
                except Exception as e:
                    logging.error(f"An error occurred processing keyword '{keyword}': {e}")

                time.sleep(2)

    except IOError as e:
        logging.critical(f"Failed to open or write to CSV file {CSV_FILE}: {e}")
        return

    logging.info(f"--- Scraping complete. Data saved to {CSV_FILE} ---")

In [17]:
scrape_app_store()

2025-11-21 20:37:05,991 - INFO - Starting App Store scrape. Output will be saved to data/app_store_asthma_apps_desc.csv
2025-11-21 20:37:05,993 - INFO - Searching for keyword: 'asthma'...
2025-11-21 20:37:07,308 - INFO - Successfully saved 20 apps for 'asthma'
2025-11-21 20:37:09,309 - INFO - Searching for keyword: 'asthma tracker'...
2025-11-21 20:37:09,814 - INFO - Successfully saved 20 apps for 'asthma tracker'
2025-11-21 20:37:11,816 - INFO - Searching for keyword: 'inhaler tracker'...
2025-11-21 20:37:12,521 - INFO - Successfully saved 19 apps for 'inhaler tracker'
2025-11-21 20:37:14,524 - INFO - Searching for keyword: 'peak flow'...
2025-11-21 20:37:17,125 - INFO - Successfully saved 20 apps for 'peak flow'
2025-11-21 20:37:19,128 - INFO - Searching for keyword: 'asthma management'...
2025-11-21 20:37:19,561 - INFO - Successfully saved 17 apps for 'asthma management'
2025-11-21 20:37:21,563 - INFO - Searching for keyword: 'asthma log'...
2025-11-21 20:37:22,191 - INFO - Successf

In [19]:
app_store_list = pd.read_csv('data/app_store_asthma_apps_desc.csv')
app_store_list.head(5)

Unnamed: 0,Keyword Searched,App Name,App ID,Seller,Price,Average Rating,Rating Count,Genre,App URL,Description
0,asthma,Asthma: Tracker & Reminders,6744072783,"No Worries! Lifestyle, LLC",Free,0.0,0,Medical,https://apps.apple.com/us/app/asthma-tracker-r...,Asthma Care Companion is a comprehensive asthm...
1,asthma,FindAir – Asthma Diary,1515944881,Findair sp. z o.o.,Free,2.75,4,Health & Fitness,https://apps.apple.com/us/app/findair-asthma-d...,FindAir application is a smart asthma diary fo...
2,asthma,"Airyn, the asthma app",1641811087,Voicemed Italia Srl,Free,0.0,0,Medical,https://apps.apple.com/us/app/airyn-the-asthma...,"Discover the revolutionary Airlyn app, a scien..."
3,asthma,Asthma Tracker゜,6444343217,Adam Cziko,Free,4.6087,69,Lifestyle,https://apps.apple.com/us/app/asthma-tracker/i...,Track asthma and take charge of your health.\n...
4,asthma,My Pollen Forecast - Allergies,1244428929,JRustonApps B.V.,Free,4.69137,25438,Health & Fitness,https://apps.apple.com/us/app/my-pollen-foreca...,My Pollen Forecast is the best app for trackin...


In [20]:
print(f"Length of app_store_list: {len(app_store_list)}")

app_store_list = app_store_list.drop_duplicates(subset=['App ID'])

print(f"Length of app_store_list after dropping duplicates: {len(app_store_list)}")

Length of app_store_list: 186
Length of app_store_list after dropping duplicates: 87


In [21]:
app_store_list.to_csv('data/app_store_asthma_apps_desc_set.csv')

---

## **[Google Play - Android]**

In [None]:
def fetch_apps(keyword: str, limit: int = 20):
    return search(keyword, lang="en", country="us", n_hits=limit)

In [None]:
def extract_info(app, keyword):
    return {
        "keyword": keyword,
        "app_id": app.get("appId"),
        "app_title": app.get("title"),
        "developer": app.get("developer"),
        "rating": app.get("score"),
        "installs": app.get("installs"),
        "price": app.get("priceText"),
        "genre": app.get("genre"),
        "summary": app.get("summary"),
        "content_rating": app.get("contentRating"),
        "url": app.get("url"),
    }

In [None]:
def main():
    results = []
    for kw in KEYWORDS:
        apps = fetch_apps(kw)
        results.extend(extract_info(a, kw) for a in apps)

    if not results:
        print("no results found")
        return

    with open("asthma_apps.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=results[0].keys())
        writer.writeheader()
        writer.writerows(results)

    print("DONE. Saved to asthma_apps.csv")

if __name__ == "__main__":
    main()


---