<a href="https://colab.research.google.com/github/latifahaljurayyan/insightApp/blob/main/Copy_of_Copy_of_gp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install google_play_scraper



In [None]:
from google_play_scraper import reviews, Sort
import pandas as pd

In [None]:
apps = {
    "Amazon": "com.amazon.mShop.android.shopping",
    "Shein": "com.zzkko",
    "AliExpress": "com.alibaba.aliexpresshd",
    "Noon": "com.noon.buyerapp",
    "Trendyol": "trendyol.com"
}


In [None]:
def label_sentiment(rating):
    if rating <= 2:
        return "Negative"
    elif rating == 3:
        return "Neutral"
    else:
        return "Positive"

In [None]:
all_reviews = []

for app_name, app_id in apps.items():
    print(f"Fetching reviews for {app_name}...")

    result, _ = reviews(
        app_id,
        lang="en",
        country="us",
        sort=Sort.NEWEST,
        count=3000
    )

    for r in result:
        all_reviews.append({
            "app_name": app_name,
            "review_text": r["content"],
            "rating": r["score"],
            "sentiment_label": label_sentiment(r["score"])
        })

Fetching reviews for Amazon...
Fetching reviews for Shein...
Fetching reviews for AliExpress...
Fetching reviews for Noon...
Fetching reviews for Trendyol...


In [None]:
df = pd.DataFrame(all_reviews)
df.head()


Unnamed: 0,app_name,review_text,rating,sentiment_label
0,Amazon,I don't like that it tells you to place anothe...,3,Neutral
1,Amazon,Drop shipped Temu products at department store...,2,Negative
2,Amazon,"Every ""Update"" kinda ruins the experience. now...",3,Neutral
3,Amazon,Amazon is very helpful when you dont live in t...,5,Positive
4,Amazon,"Not available for tablets, ok no problem will ...",1,Negative


In [None]:
import re



def clean_text(text):

    text = re.sub(r"http\S+", "", str(text))

    text = re.sub(r"[^a-zA-Z\s]", "", text)

    text = text.lower()

    return text



df["clean_review"] = df["review_text"].apply(clean_text)

In [None]:
df.to_csv("google_play_app_reviews.csv", index=False)

In [None]:
df[["review_text", "clean_review"]].head(10)

Unnamed: 0,review_text,clean_review
0,I don't like that it tells you to place anothe...,i dont like that it tells you to place another...
1,Drop shipped Temu products at department store...,drop shipped temu products at department store...
2,"Every ""Update"" kinda ruins the experience. now...",every update kinda ruins the experience now i ...
3,Amazon is very helpful when you dont live in t...,amazon is very helpful when you dont live in town
4,"Not available for tablets, ok no problem will ...",not available for tablets ok no problem will b...
5,poor customer service,poor customer service
6,حراااميه ونصابين لو في اقل من نجمه كنت عملتها ...,
7,yellow cancel return button has been glitched ...,yellow cancel return button has been glitched ...
8,I'm kinda new to the Amazon game. So .... so f...,im kinda new to the amazon game so so far so ...
9,Filters are not showing when performing a sear...,filters are not showing when performing a sear...


In [None]:
import nltk
nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [None]:
df["Tokens"]= df["clean_review"].apply(word_tokenize)
df[[ "clean_review","Tokens"]].head(5)

Unnamed: 0,clean_review,Tokens
0,i dont like that it tells you to place another...,"[i, dont, like, that, it, tells, you, to, plac..."
1,drop shipped temu products at department store...,"[drop, shipped, temu, products, at, department..."
2,every update kinda ruins the experience now i ...,"[every, update, kinda, ruins, the, experience,..."
3,amazon is very helpful when you dont live in town,"[amazon, is, very, helpful, when, you, dont, l..."
4,not available for tablets ok no problem will b...,"[not, available, for, tablets, ok, no, problem..."


In [None]:
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk


nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

r
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def remove_stopwords(tokens):
    return [word for word in tokens if word not in stop_words]

df["Tokens_No_Stopwords"] = df["Tokens"].apply(remove_stopwords)


def lemmatize_tokens(tokens):
    return [lemmatizer.lemmatize(word) for word in tokens]

df["Tokens_No_Stopwords_Lemmatized"] = df["Tokens_No_Stopwords"].apply(lemmatize_tokens)


df[["Tokens", "Tokens_No_Stopwords", "Tokens_No_Stopwords_Lemmatized"]].head()


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Unnamed: 0,Tokens,Tokens_No_Stopwords,Tokens_No_Stopwords_Lemmatized
0,"[i, dont, like, that, it, tells, you, to, plac...","[dont, like, tells, place, another, youll, get...","[dont, like, tell, place, another, youll, get,..."
1,"[drop, shipped, temu, products, at, department...","[drop, shipped, temu, products, department, st...","[drop, shipped, temu, product, department, sto..."
2,"[every, update, kinda, ruins, the, experience,...","[every, update, kinda, ruins, experience, open...","[every, update, kinda, ruin, experience, open,..."
3,"[amazon, is, very, helpful, when, you, dont, l...","[amazon, helpful, dont, live, town]","[amazon, helpful, dont, live, town]"
4,"[not, available, for, tablets, ok, no, problem...","[available, tablets, ok, problem, shopping, eb...","[available, tablet, ok, problem, shopping, eba..."


In [None]:
df['tokens_str'] = df['Tokens_No_Stopwords_Lemmatized'].apply(lambda x: ' '.join(x))
df = df.drop_duplicates(subset='tokens_str', keep='first')

In [None]:
df = df.drop(columns=['tokens_str'])

In [None]:
df.to_csv("google_play_app_reviews.csv", index=False)

In [None]:
from google.colab import files
files.download("google_play_app_reviews.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>