In [24]:
import gensim
import pandas as pd
from gensim import corpora
from gensim import similarities
from pprint import pprint

In [28]:
def calSimilarity(app: str, row: int)->pd.DataFrame:
    name_list = ['high_ratings', 'low_ratings']
    sort_by_rating = get_sort_by_rating(app)
    for i in range(2):
        # reviews_df = pd.read_csv(f'crawler_result/{app}/{app}.csv')
        df = sort_by_rating[i]
        reviews = df['review']
        # list of app reviews
        reviews = reviews.to_list()
        # lemmetation
        texts = [[word for word in review.lower().split()] for review in reviews]
        # create dict
        dictionary = corpora.Dictionary(texts)
        # bag-of-words
        corpus = [dictionary.doc2bow(text) for text in texts]
        # LDA modeling
        lda_model = gensim.models.LdaModel(corpus, num_topics=2, id2word=dictionary, passes=15)
        # topics
        # pprint(lda_model.print_topics())
        kw_df = pd.read_csv('kw_db.csv')
        feature_df = kw_df['functions'][row]
        # list of features
        features = feature_df.split('\n')
        # use gensim similarities module
        index = similarities.MatrixSimilarity(lda_model[corpus])
        # 創建一個空的列表來存儲所有的結果
        results = []
        # compare the similarity
        for feature in features:
            feature_bow = dictionary.doc2bow(feature.lower().split())
            feature_lda = lda_model[feature_bow]
            sims = index[feature_lda]
            for review, score in zip(reviews, sims):
                # 將每次迭代的結果添加到results列表中
                results.append({'feature': feature, 'review': review, 'similarity score': score})
        # 使用results列表來創建一個DataFrame
        df = pd.DataFrame(results)
        df.to_csv(f'crawler_result/{app}/feature_sim_score_{app}_{name_list[i]}.csv')
    return df

def get_sort_by_rating(app: str)->[pd.DataFrame, pd.DataFrame]:
    high_ratings = pd.read_csv(f'crawler_result/{app}/{app}_high_ratings.csv')
    low_ratings = pd.read_csv(f'crawler_result/{app}/{app}_low_ratings.csv')
    return [high_ratings, low_ratings] 

In [29]:
kw_df = pd.read_csv('kw_db.csv')
kw_df

Unnamed: 0.1,Unnamed: 0,functions
0,Duoswim:swim workout app,personal AI coach\nguided workouts\n1000+ swim...
1,swim.com:swim tracker,track your swims and achieve your goals\ndisco...
2,swimup-swimming training,personalized training plans\ndrill&technique v...
3,form swim,"choose own plans or workouts in app,can also l..."
4,myswimpro:#1 swim workout app,MySwimPro Coach unlocks:\nPersonalized swim Tr...
5,garmin connect,記錄距離\n設定泳池大小\n泳姿辨識\n自動休息\n技術訓練日誌
6,fitness,automatically tracks your splits and auto sets...


In [30]:
df = calSimilarity('duoswim', 0)
df

IndexError: index 1 is out of bounds for axis 0 with size 1

In [None]:
df = calSimilarity('swim-com', 1)
df

Unnamed: 0,feature,review,similarity score
0,track your swims and achieve your goals,After using this app for two years on a Pebble...,0.993582
1,track your swims and achieve your goals,"Strictly from an App Design standpoint, it bre...",0.993582
2,track your swims and achieve your goals,They’ve made some good improvements to the app...,0.113116
3,track your swims and achieve your goals,"At 70 yo I sometimes ( no, all the time) need ...",0.993582
4,track your swims and achieve your goals,This app has all the potential of being great ...,0.113116
...,...,...,...
1387,view performace over time,I have used this app with my old Apple Watch S...,0.190601
1388,view performace over time,I want to use this app. I love swim.com and us...,0.987577
1389,view performace over time,***Update - 02-10-2017\nMore updates have been...,0.177296
1390,view performace over time,App.has gone from functional to brick. Says it...,0.991725


In [None]:
df = calSimilarity('swimup', 2)
df

Unnamed: 0,feature,review,similarity score
0,personalized training plans,"Amazing app, amazing coach and superb courses....",0.35473
1,personalized training plans,Tried a few training apps for getting into swi...,0.953606
2,personalized training plans,I have been trying to learn how to freestyle w...,0.35473
3,personalized training plans,"Great app! Simple as it is, works good for beg...",0.375119
4,personalized training plans,They put me on 2 levels before champion but th...,0.365907
5,personalized training plans,Thank you guys. Appreciate your work.,0.435076
6,personalized training plans,I love Swimup’s YouTube channel so I was reall...,0.35473
7,personalized training plans,No voice tutorials makes these videos useless.,0.95848
8,drill&technique video library,"Amazing app, amazing coach and superb courses....",0.92186
9,drill&technique video library,Tried a few training apps for getting into swi...,0.439253


In [None]:
df = calSimilarity('form-swim', 3)
df

Unnamed: 0,feature,review,similarity score
0,"choose own plans or workouts in app,can also l...","I like sport gadgets, but usually 9 out of 10 ...",0.981654
1,"choose own plans or workouts in app,can also l...",My biggest question was if I was going to be a...,0.981654
2,"choose own plans or workouts in app,can also l...",This has been a game changer in how I swim. Fi...,0.981654
3,"choose own plans or workouts in app,can also l...",The googles are fine and the workout generatio...,0.190673
4,"choose own plans or workouts in app,can also l...",This and bone conduction headphones have me sw...,0.981654
...,...,...,...
305,customize your goggles(split times/stroke coun...,As described in title,0.995023
306,customize your goggles(split times/stroke coun...,It recognizes freestyle but all my breast stro...,0.981801
307,customize your goggles(split times/stroke coun...,"Great goggles, but major sync issues to Apple ...",0.974699
308,customize your goggles(split times/stroke coun...,I was excited when I first saw the Form goggle...,0.223523


In [None]:
df = calSimilarity('myswimpro', 4)
df

Unnamed: 0,feature,review,similarity score
0,MySwimPro Coach unlocks:,Got the app 5 weeks ago and I have made more p...,0.255298
1,MySwimPro Coach unlocks:,I am not a competíos swimmer but I enjoy it an...,0.277176
2,MySwimPro Coach unlocks:,Very impressed. As a long time swimmer with a...,0.966862
3,MySwimPro Coach unlocks:,"Let’s be honest, for many of us, our best comp...",0.255298
4,MySwimPro Coach unlocks:,First and foremost SwimPro is a great app with...,0.255298
...,...,...,...
9211,Follow friends,Do not get this app! It will work for a while...,0.219814
9212,Follow friends,I've used this app 3 times and all 3 times I g...,0.978960
9213,Follow friends,I downloaded the app to see if I could use it ...,0.996890
9214,Follow friends,App is useless. Cant even log in using email. ...,0.232212
