In [4]:
import logging
from google_play_scraper import reviews_all, app
from urllib.error import HTTPError
from datetime import datetime, timedelta
import time

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define the app IDs for the banks
app_ids = {
    'Abyssinia Bank': 'com.abyssinia.bank',  # Replace with actual app ID
    'Commercial Bank of Ethiopia': 'com.cbe.mobilebanking',  # Replace with actual app ID
    'GlobalBankEthiopia': 'com.global.bank'  # Replace with actual app ID
}

def fetch_reviews(app_id):
    """Fetch all reviews for a given app."""
    try:
        reviews = reviews_all(app_id)
        return reviews
    except HTTPError as e:
        logger.error("HTTPError while fetching reviews for app ID %s: %s", app_id, e)
        return []

def fetch_app_details(app_id):
    """Fetch app details to get download count and other metadata."""
    try:
        details = app(app_id)
        return details
    except HTTPError as e:
        logger.error("HTTPError while fetching details for app ID %s: %s", app_id, e)
        return {}

def track_reviews_and_downloads(app_ids, duration_days=7):
    """Track reviews and download counts over a period of time."""
    end_time = datetime.now()
    start_time = end_time - timedelta(days=duration_days)
    
    logger.info("Starting tracking from %s to %s", start_time, end_time)
    
    for bank, app_id in app_ids.items():
        logger.info("Fetching data for %s", bank)
        
        # Fetch and log reviews
        reviews = fetch_reviews(app_id)
        if not reviews:
            continue
        
        logger.info("Fetched %d reviews for %s", len(reviews), bank)
        
        # Filter reviews by time
        filtered_reviews = [review for review in reviews if datetime.fromtimestamp(review['at'].timestamp()) >= start_time]
        logger.info("Filtered down to %d reviews for %s", len(filtered_reviews), bank)
        
        # Fetch app details
        details = fetch_app_details(app_id)
        if not details:
            continue
        
        download_count = details.get('minInstalls', 0)
        logger.info("Current download count for %s: %d", bank, download_count)
        
        # Simulate tracking over time (for demonstration, we will just log the initial data)
        logger.info("Initial data for %s: %d reviews, %d downloads", bank, len(filtered_reviews), download_count)
        
        # You can store or process this data as needed
        
        # Sleep to simulate daily tracking (in real scenario, you'd schedule this to run daily)
        time.sleep(1)  # sleep for a second instead of a day for demonstration purposes

if __name__ == "__main__":
    track_reviews_and_downloads(app_ids)

INFO:__main__:Starting tracking from 2024-05-15 14:16:27.330828 to 2024-05-22 14:16:27.330828
INFO:__main__:Fetching data for Abyssinia Bank
INFO:__main__:Fetching data for Commercial Bank of Ethiopia
INFO:__main__:Fetching data for GlobalBankEthiopia


In [9]:
import logging
from google_play_scraper import reviews_all, app
from urllib.error import HTTPError
from datetime import datetime, timedelta
import time

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define the app IDs for the banks
app_ids = {
    'Abyssinia Bank': 'com.boa.boaMobileBanking',  # Replace with actual app ID
    'Commercial Bank of Ethiopia': 'com.cbe.cbeMobileBanking',  # Replace with actual app ID
    # 'GlobalBankEthiopia': 'com.gbe.gbeMobileBanking'  # Replace with actual app ID
}

def fetch_reviews(app_id):
    """Fetch all reviews for a given app."""
    try:
        reviews = reviews_all(app_id)
        return reviews
    except HTTPError as e:
        logger.error("HTTPError while fetching reviews for app ID %s: %s", app_id, e)
        return []

def fetch_app_details(app_id):
    """Fetch app details to get download count and other metadata."""
    try:
        details = app(app_id)
        return details
    except HTTPError as e:
        logger.error("HTTPError while fetching details for app ID %s: %s", app_id, e)
        return {}

def track_reviews_and_downloads(app_ids, duration_days=7):
    """Track reviews and download counts over a period of time."""
    end_time = datetime.now()
    start_time = end_time - timedelta(days=duration_days)
    
    logger.info("Starting tracking from %s to %s", start_time, end_time)
    
    for bank, app_id in app_ids.items():
        logger.info("Fetching data for %s", bank)
        
        # Fetch and log reviews
        reviews = fetch_reviews(app_id)
        if not reviews:
            logger.info("No reviews found for %s", bank)
            continue
        
        logger.info("Fetched %d reviews for %s", len(reviews), bank)
        
        # Filter reviews by time
        filtered_reviews = [review for review in reviews if datetime.fromtimestamp(review['at'].timestamp()) >= start_time]
        logger.info("Filtered down to %d reviews for %s", len(filtered_reviews), bank)
        
        # Log review details (for demonstration, print first 5 reviews)
        for review in filtered_reviews[:5]:
            logger.info("Review for %s: %s", bank, review)
        
        # Fetch app details
        details = fetch_app_details(app_id)
        if not details:
            logger.info("No app details found for %s", bank)
            continue
        
        download_count = details.get('minInstalls', 0)
        logger.info("Current download count for %s: %d", bank, download_count)
        
        # Log app details
        logger.info("App details for %s: %s", bank, details)
        
        # Simulate tracking over time (for demonstration, we will just log the initial data)
        logger.info("Initial data for %s: %d reviews, %d downloads", bank, len(filtered_reviews), download_count)
        
        # You can store or process this data as needed
        
        # Sleep to simulate daily tracking (in real scenario, you'd schedule this to run daily)
        time.sleep(1)  # sleep for a second instead of a day for demonstration purposes

if __name__ == "__main__":
    track_reviews_and_downloads(app_ids)


INFO:__main__:Starting tracking from 2024-05-15 14:37:28.634982 to 2024-05-22 14:37:28.634982
INFO:__main__:Fetching data for Abyssinia Bank
INFO:__main__:Fetched 398 reviews for Abyssinia Bank
INFO:__main__:Filtered down to 22 reviews for Abyssinia Bank
INFO:__main__:Review for Abyssinia Bank: {'reviewId': '322c12e9-1e9a-4609-b4c0-3ef5f78ff589', 'userName': 'Yonas A', 'userImage': 'https://play-lh.googleusercontent.com/a-/ALV-UjXjmqRijwRp6VOnJeChEHrGETK1Q48QR-Nnzex9FR6vYtlCqkI', 'content': "Playstore need to have some option to give 0 stars because this application right here, deserve exactly that, imagine being one of the biggest banks In the country and can't manage to have good mobile application in the Era of mobile money, you guys are in the brink of collapse you better make it right real quick or you gonna be 10 meters down to the grave. DON'T USE THIS APP, IT'S WASTE OF TIME.", 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '24.05.07', 'at': datetime.datetime(2024, 5, 

In [16]:
import logging
import pandas as pd
from google_play_scraper import reviews_all, app
from urllib.error import HTTPError
from datetime import datetime, timedelta
import time
import os

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define the app IDs for the banks
app_ids = {
    'Abyssinia Bank': 'com.boa.boaMobileBanking',
    'Commercial Bank of Ethiopia': 'com.combanketh.mobilebanking',
}

def fetch_reviews(app_id):
    """Fetch all reviews for a given app."""
    try:
        reviews = reviews_all(app_id)
        return reviews
    except HTTPError as e:
        logger.error("HTTPError while fetching reviews for app ID %s: %s", app_id, e)
        return []

def fetch_app_details(app_id):
    """Fetch app details to get download count and other metadata."""
    try:
        details = app(app_id)
        return details
    except HTTPError as e:
        logger.error("HTTPError while fetching details for app ID %s: %s", app_id, e)
        return {}

def track_reviews_and_downloads(app_ids, duration_days=7):
    """Track reviews and download counts over a period of time."""
    end_time = datetime.now()
    start_time = end_time - timedelta(days=duration_days)
    
    logger.info("Starting tracking from %s to %s", start_time, end_time)
    
    all_reviews = []
    
    for bank, app_id in app_ids.items():
        logger.info("Fetching data for %s", bank)
        
        # Fetch and log reviews
        reviews = fetch_reviews(app_id)
        if not reviews:
            logger.info("No reviews found for %s", bank)
            continue
        
        logger.info("Fetched %d reviews for %s", len(reviews), bank)
        
        # Filter reviews by time
        filtered_reviews = [review for review in reviews if datetime.fromtimestamp(review['at'].timestamp()) >= start_time]
        logger.info("Filtered down to %d reviews for %s", len(filtered_reviews), bank)
        
        for review in filtered_reviews:
            review_data = {
                'reviewId': review['reviewId'],
                'userName': review['userName'],
                'userImage': review['userImage'],
                '👍': review['thumbsUpCount'],
                'reviewCreatedVersion': review.get('reviewCreatedVersion'),
                'at': review['at'],
                'replyContent': review.get('replyContent', ''),
                'repliedAt': review.get('repliedAt', ''),
                'appVersion': review.get('appVersion', ''),
                'score': review['score'],
                'Comments': review['content'],
                'Keywords': '',  # Placeholder for keywords
                'LDA_Category': '',  # Placeholder for LDA category
                'Sentiment': '',  # Placeholder for sentiment
                'Insight': ''  # Placeholder for insight
            }
            all_reviews.append(review_data)
        
        # Fetch app details
        details = fetch_app_details(app_id)
        if not details:
            logger.info("No app details found for %s", bank)
            continue
        
        download_count = details.get('minInstalls', 0)
        logger.info("Current download count for %s: %d", bank, download_count)
        
        # Log app details
        logger.info("App details for %s: %s", bank, details)
        
        # Sleep to simulate daily tracking (in real scenario, you'd schedule this to run daily)
        time.sleep(1)  # sleep for a second instead of a day for demonstration purposes
    
    # Create a DataFrame from the collected reviews
    df_reviews = pd.DataFrame(all_reviews)
    
    # Print the head of the DataFrame
    print(df_reviews.head())
    
    # Ensure the data directory exists
    os.makedirs('data', exist_ok=True)
    
    # Save the DataFrame to a CSV file
    csv_file_path = '../data/google_play_reviews.csv'
    df_reviews.to_csv(csv_file_path, index=False)
    
    logger.info("Saved reviews data to %s", csv_file_path)

if __name__ == "__main__":
    track_reviews_and_downloads(app_ids)

INFO:__main__:Starting tracking from 2024-05-15 15:14:31.029110 to 2024-05-22 15:14:31.029110
INFO:__main__:Fetching data for Abyssinia Bank
INFO:__main__:Fetched 199 reviews for Abyssinia Bank
INFO:__main__:Filtered down to 22 reviews for Abyssinia Bank
INFO:__main__:Current download count for Abyssinia Bank: 100000
INFO:__main__:App details for Abyssinia Bank: {'title': 'BoA Mobile', 'description': 'BoA Mobile an innovative app from Bank of Abyssinia, empowers customers to effortlessly conduct a wide range of banking transactions right at their fingertips. Experience seamless and convenient banking on the go with BoA Mobile', 'descriptionHTML': 'BoA Mobile an innovative app from Bank of Abyssinia, empowers customers to effortlessly conduct a wide range of banking transactions right at their fingertips. Experience seamless and convenient banking on the go with BoA Mobile', 'summary': 'Mobile Banking Application', 'installs': '100,000+', 'minInstalls': 100000, 'realInstalls': 393375, '

                               reviewId        userName  \
0  322c12e9-1e9a-4609-b4c0-3ef5f78ff589         Yonas A   
1  42e6b999-2d95-4374-bf40-93c60d08c58f  Abdurezak Awol   
2  d8c26573-6298-42ad-b768-d2981e2ec79b    Eyasu Ketema   
3  9a9a538f-0286-4c22-b170-ae47466f92df    Destaw Ngate   
4  fe4f2a61-8e80-47ea-ad5e-eb9c9e4f1dca      Fuad Yasin   

                                           userImage  👍 reviewCreatedVersion  \
0  https://play-lh.googleusercontent.com/a-/ALV-U...  0             24.05.07   
1  https://play-lh.googleusercontent.com/a/ACg8oc...  0             24.05.07   
2  https://play-lh.googleusercontent.com/a-/ALV-U...  0             24.05.07   
3  https://play-lh.googleusercontent.com/a/ACg8oc...  0             24.05.07   
4  https://play-lh.googleusercontent.com/a/ACg8oc...  0                 None   

                   at replyContent repliedAt appVersion  score  \
0 2024-05-21 13:17:07         None      None   24.05.07      1   
1 2024-05-20 23:29:06         No