<a href="https://colab.research.google.com/github/kikidatalabs/kiki_1/blob/main/20241220_IKEA_app_review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Colab 환경에서 구글 앱 스토어와 애플 앱 스토어 리뷰 크롤링 및 감성 분석 코드

## 1. 필요한 라이브러리 설치 및 로드
# 라이브러리가 설치되지 않은 경우 설치하도록 수정
import os
import subprocess

def install(package):
    subprocess.check_call(["python3", "-m", "pip", "install", package])

try:
    from google_play_scraper import Sort, reviews
except ImportError:
    print("google-play-scraper not found. Installing...")
    install("google-play-scraper")
    from google_play_scraper import Sort, reviews

try:
    from app_store_scraper import AppStore
except ImportError:
    print("app-store-scraper not found. Installing...")
    install("app-store-scraper")
    from app_store_scraper import AppStore

try:
    from transformers import pipeline
except ImportError:
    print("transformers not found. Installing...")
    install("transformers")
    from transformers import pipeline

import pandas as pd

# 감성 분석 모델 로드
sentiment_analyzer = pipeline("sentiment-analysis")

## 2. 구글 앱 스토어 리뷰 크롤링
def fetch_google_reviews(app_id, lang='en', country='us', num_reviews=100):
    google_reviews = reviews(
        app_id,
        lang=lang,
        country=country,
        sort=Sort.MOST_RELEVANT,
        count=num_reviews
    )
    df_google = pd.DataFrame([{
        "review": review["content"],
        "score": review["score"]
    } for review in google_reviews[0]])
    return df_google

## 3. 애플 앱 스토어 리뷰 크롤링
def fetch_apple_reviews(app_name, country='us', num_reviews=100):
    app = AppStore(country=country, app_name=app_name)
    app.review(how_many=num_reviews)
    df_apple = pd.DataFrame([{
        "review": review["review"],
        "score": review["rating"]
    } for review in app.reviews])
    return df_apple

## 4. 감성 분석 수행
def perform_sentiment_analysis(df):
    df['sentiment'] = df['review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])
    return df

## 5. 실행
if __name__ == "__main__":
    # IKEA 앱의 앱 ID 및 이름 설정
    google_app_id = "com.ingka.ikea.app"  # 구글 앱 스토어 ID
    apple_app_name = "ikea"  # 애플 앱 스토어 이름

    try:
        # 구글 리뷰 가져오기
        print("Fetching Google Play Store reviews...")
        google_reviews = fetch_google_reviews(google_app_id, lang='en', country='us', num_reviews=100)
        print("Google reviews fetched.")

        # 애플 리뷰 가져오기
        print("Fetching Apple App Store reviews...")
        apple_reviews = fetch_apple_reviews(apple_app_name, country='us', num_reviews=100)
        print("Apple reviews fetched.")

        # 감성 분석
        print("Performing sentiment analysis...")
        google_reviews = perform_sentiment_analysis(google_reviews)
        apple_reviews = perform_sentiment_analysis(apple_reviews)

        # 데이터 저장
        print("Saving reviews to CSV files...")
        google_reviews.to_csv("google_reviews.csv", index=False)
        apple_reviews.to_csv("apple_reviews.csv", index=False)

        print("All tasks completed. Review data saved.")
    except Exception as e:
        print(f"An error occurred: {e}")


google-play-scraper not found. Installing...
app-store-scraper not found. Installing...


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


Fetching Google Play Store reviews...
Google reviews fetched.
Fetching Apple App Store reviews...
Apple reviews fetched.
Performing sentiment analysis...
Saving reviews to CSV files...
All tasks completed. Review data saved.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: mount failed