In [1]:
import Constants as Const
import ModelLoader
import DataLoader
import random
import pandas as pd
import ExampleMatches
from tqdm import tqdm
from SampeledIds import sampled_ids

SEED = 42
NUM_SAMPLES = 100
NUMBER_NEAREST = 3
random.seed(SEED)

app_reviews = DataLoader.load_df_compressed(Const.APP_REVIEWS)
tweets = DataLoader.load_df_compressed(Const.TWEETS)

def select_random_reviews(df, app, n):
    df = df[
            df['inSample'] 
            & (df['app_id']==app)
            & (df['review_class']=='bug_report')
            & (~df['reviewId'].isin(sampled_ids))
        ]
    
    random_reviews = df.sample(n, random_state=SEED)[['content', 'reviewId', 'app_id']].values.tolist()
    return random_reviews

def select_random_tweets(df, app, n):
    df = df[
            df['inSample'] 
            & (df['app']==app)
            & (df['review_class']=='bug_report')
        ]
    random_tweets = df.sample(n, random_state=SEED)[['text', 'ID', 'app']].values.tolist()
    return random_tweets
    

random_firefox_reviews = select_random_reviews(app_reviews, Const.FIREFOX_ID, NUM_SAMPLES)
random_vlc_reviews = select_random_reviews(app_reviews, Const.VLC_ID, NUM_SAMPLES)
random_signal_reviews = select_random_reviews(app_reviews, Const.SIGNAL_ID, NUM_SAMPLES)
random_nextcloud_reviews = select_random_reviews(app_reviews, Const.NEXTCLOUD_ID, NUM_SAMPLES)

# random_firefox_tweets = select_random_tweets(tweets, '@firefox', NUM_SAMPLES)
# random_vlc_tweets = select_random_tweets(tweets, '@videolan', NUM_SAMPLES)

print(f'firefox_reviews:{len(random_firefox_reviews)}')
print(f'vlc_reviews:{len(random_vlc_reviews)}')
print(f'signal_reviews:{len(random_signal_reviews)}')
print(f'nextcloud_reviews:{len(random_nextcloud_reviews)}')
print()
# print(f'firefox_tweets:{len(random_firefox_tweets)}')
# print(f'vlc_tweets:{len(random_vlc_tweets)}')

firefox_reviews:100
vlc_reviews:100
signal_reviews:100
nextcloud_reviews:100



In [2]:

firefox_issues_android_embeddings = DataLoader.load_embeddings(Const.FIREFOX_ANDROID_EMBEDDINGS)
# firefox_issues_desktop_embeddings = DataLoader.load_embeddings(Const.FIREFOX_EMBEDDINGS)
vlc_issues_embeddings = DataLoader.load_embeddings(Const.VLC_EMBEDDINGS)
signal_issues_embeddings = DataLoader.load_embeddings(Const.SIGNAL_EMBEDDINGS)
nextcloud_issues_embeddings = DataLoader.load_embeddings(Const.NEXTCLOUD_EMBEDDINGS)


firefox_android_issues = DataLoader.load_df_compressed(Const.FIREFOX_ISSUES_ANDROID)
# firefox_desktop_issues = DataLoader.load_df_compressed(Const.FIREFOX_ISSUES_DESKTOP)
vlc_issues = DataLoader.load_df_compressed(Const.VLC_ISSUES)
signal_issues = DataLoader.load_df_compressed(Const.SIGNAL_ISSUES)
nextcloud_issues = DataLoader.load_df_compressed(Const.NEXTCLOUD_ISSUES)

def create_coding(
    app,
    platform,
    source,
    user_feedback_text,
    issue_text,
    user_feedback_id,
    issue_id,
    distance):
        return {
            'app': app,
            'platform': platform,
            'source': source,
            'user_feedback': user_feedback_text,
            'issue_text': issue_text,
            'user_feedback_id': str(user_feedback_id),
            'issue_id': issue_id,
            'distance': distance
        }

def append_codings(user_feedback, embeddings, issues, platform, source, title_column, codings):
    for text, id, app in tqdm(user_feedback):
        distances = ExampleMatches.get_nearest_issues(text, embeddings, n=NUMBER_NEAREST)
        for issue_id, distance in distances:
            issue_text = issues[issues['id']==issue_id][title_column].values[0]
            coding = create_coding(app, platform, source, text, issue_text, id, issue_id, distance.item())
            codings.append(coding)
    

codings = []

append_codings(random_firefox_reviews, firefox_issues_android_embeddings, firefox_android_issues, 'app', 'app_review', 'summary', codings)

append_codings(random_vlc_reviews, vlc_issues_embeddings, vlc_issues, 'app', 'app_review', 'summary', codings)

# append_codings(random_firefox_tweets, firefox_issues_desktop_embeddings, firefox_desktop_issues, 'desktop', 'twitter', codings)

# append_codings(random_vlc_tweets, vlc_issues_embeddings, vlc_issues, 'desktop', 'twitter', codings)

append_codings(random_signal_reviews, signal_issues_embeddings, signal_issues, 'app', 'app_review', 'title', codings)

append_codings(random_nextcloud_reviews, nextcloud_issues_embeddings, nextcloud_issues, 'app', 'app_review', 'title', codings)




# for tweet_text, tweet_id in tqdm(random_tweets_bug_reports):
#     distances = ExampleMatches.get_nearest_issues(tweet_text, desktop_issues_embeddings, n=NUMBER_NEAREST)
#     for issue_id, distance in distances:
#         issue_text = firefox_desktop_issues[firefox_desktop_issues['id']==issue_id]['summary'].values[0]
#         coding = create_coding('firefox','desktop', 'twitter',tweet_text, issue_text, tweet_id, issue_id, distance.item())
#         codings.append(coding)


df_codings = pd.DataFrame(codings)
df_codings.to_csv(Const.CODINGS, index=False)
pd.read_csv(Const.CODINGS)


INFO:c:\Users\MH\git\icse21\src\DataLoader.py:embeddings loaded from: c:\Users\MH\git\icse21\data\firefox_android_embeddings_nouns.pkl
INFO:c:\Users\MH\git\icse21\src\DataLoader.py:embeddings loaded from: c:\Users\MH\git\icse21\data\vlc_embeddings_nouns.pkl
INFO:c:\Users\MH\git\icse21\src\DataLoader.py:embeddings loaded from: c:\Users\MH\git\icse21\data\signal_embeddings_nouns.pkl
INFO:c:\Users\MH\git\icse21\src\DataLoader.py:embeddings loaded from: c:\Users\MH\git\icse21\data\nextcloud_embeddings_nouns.pkl
100%|██████████| 100/100 [05:27<00:00,  3.28s/it]
100%|██████████| 100/100 [00:11<00:00,  8.62it/s]
100%|██████████| 100/100 [01:23<00:00,  1.19it/s]
100%|██████████| 100/100 [00:30<00:00,  3.26it/s]


Unnamed: 0,app,platform,source,user_feedback,issue_text,user_feedback_id,issue_id,distance
0,org.mozilla.firefox,app,app_review,Mozilla's new add-on policy ruined everything....,Firefox Beta version 65.0b5 gives the false & ...,gp:AOqpTOHjRnXA91fybp8lvHqEBHkBmcmOcToaOAmZRuo...,1515842,0.870129
1,org.mozilla.firefox,app,app_review,Mozilla's new add-on policy ruined everything....,"during upgrade of open webapps, error: ""an exi...",gp:AOqpTOHjRnXA91fybp8lvHqEBHkBmcmOcToaOAmZRuo...,988980,0.864170
2,org.mozilla.firefox,app,app_review,Mozilla's new add-on policy ruined everything....,<html>'s clientHeight is incorrect and inconsi...,gp:AOqpTOHjRnXA91fybp8lvHqEBHkBmcmOcToaOAmZRuo...,1105326,0.861604
3,org.mozilla.firefox,app,app_review,Videos or songs don't stop playing after you s...,Sound of the webm video keeps playing even if ...,gp:AOqpTOEZBygtMIIfRZ6MNA7SDeVl6FQEILngeYA9P70...,1265284,0.875324
4,org.mozilla.firefox,app,app_review,Videos or songs don't stop playing after you s...,Nothing happens when pressing on volume icon w...,gp:AOqpTOEZBygtMIIfRZ6MNA7SDeVl6FQEILngeYA9P70...,1133683,0.871545
...,...,...,...,...,...,...,...,...
1195,com.nextcloud.client,app,app_review,The photo upload function doesn't care if you ...,Autoupload of images does not work when SD car...,gp:AOqpTOFJtPG2KpTet7IPG5ET-vHVeLkSSjY40rAkSmL...,198345245,0.864989
1196,com.nextcloud.client,app,app_review,The photo upload function doesn't care if you ...,Auto-Upload issue: Photos taken are not upload...,gp:AOqpTOFJtPG2KpTet7IPG5ET-vHVeLkSSjY40rAkSmL...,363781464,0.858782
1197,com.nextcloud.client,app,app_review,Auto uploads no longer work. App just freezes ...,Retrying failed uploads stops the app,gp:AOqpTOGrSwjxvHu5Yfs_Z2vedhy40sxpUHcem0Ar33P...,289791893,0.930025
1198,com.nextcloud.client,app,app_review,Auto uploads no longer work. App just freezes ...,Auto upload on 1.4.2 and Android 7.1.2 still b...,gp:AOqpTOGrSwjxvHu5Yfs_Z2vedhy40sxpUHcem0Ar33P...,221706645,0.921641
