# Importing Required Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import parse

### Creating a function to get all the app names

In [2]:
path = r'./data'
def getAppNames(path):
    list_of_files = os.listdir(path)
    app_names = []
    for file in list_of_files:
        name = parse.parse('submission_{}.csv',file)
        app_names.append(str(name)[10:-7])
    
    return app_names

appNames = getAppNames(path)
appNames

['candidate', 'jobget', 'jobseeker', 'jobswipe', 'mobile']

##### To create the final_submission.csv file, building a data pipeline function that takes path and app names as input and returns the final dataframe.

In [3]:
def creating_df(path, appNames):
    dfs = []
    for file in os.listdir(path):
        print("Reading "+file)
        data = pd.read_csv(path+'/'+ file)
        df = pd.DataFrame(data)
        print("Current Data Frame shape ")
        print(df.shape)
        df['appName'] = appNames[os.listdir(path).index(file)]
        dfs.append(df)
    print("Total Files found: ", len(dfs))
    final_df = pd.DataFrame()
    for i in range(len(dfs)):
        final_df = pd.concat([final_df, dfs[i]], axis=0)
        
    return final_df
    
final_df = creating_df(path, appNames)
final_df

Reading submission_candidate.csv
Current Data Frame shape 
(143, 10)
Reading submission_jobget.csv
Current Data Frame shape 
(3374, 10)
Reading submission_jobseeker.csv
Current Data Frame shape 
(8909, 10)
Reading submission_jobswipe.csv
Current Data Frame shape 
(703, 10)
Reading submission_mobile.csv
Current Data Frame shape 
(105, 10)
Total Files found:  5


Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appName
0,d505016e-3b37-4ba2-97cf-16c9ef54f84c,David Webster,https://play-lh.googleusercontent.com/a-/ACB-R...,Found great bar jobs on here in the past but n...,1,0,3.2.11,2023-02-08 19:41:20,,,candidate
1,eae5efd4-26eb-469d-b793-f9689510ebbf,Joshua Espinoza,https://play-lh.googleusercontent.com/a/AGNmyx...,Try to upload resume and it just stays uploading!,2,0,3.2.6,2022-09-26 04:18:21,,,candidate
2,c7156674-d0bf-497e-924e-07802e9cda9d,Robert Basom,https://play-lh.googleusercontent.com/a-/ACB-R...,"This is one of the worst apps I've ever used,w...",1,0,,2022-07-20 08:11:02,,,candidate
3,520a4e2f-d352-4fe4-a63b-ceea058663bf,Daniel DLS,https://play-lh.googleusercontent.com/a-/ACB-R...,If I cant upload a resume because for whatever...,1,1,3.2.5,2022-06-27 23:48:52,,,candidate
4,254d1260-7a30-4ae1-8c68-53f597c3c84c,Morris Kagunya,https://play-lh.googleusercontent.com/a-/ACB-R...,It's not logging in,1,0,3.2.3,2022-05-16 22:59:26,,,candidate
...,...,...,...,...,...,...,...,...,...,...,...
100,45d71edd-c573-4bc2-a219-ac7e31cb90ad,Alan J,https://play-lh.googleusercontent.com/a-/ACB-R...,Rather tasty,5,7,1.0.1.364,2021-11-14 16:15:49,,,mobile
101,6d98a697-0814-46f1-b8b8-fbbcb3f9f3b3,Tracy Nguyen,https://play-lh.googleusercontent.com/a-/ACB-R...,I am about to do it myself to make it happen a...,5,9,,2021-11-13 03:00:17,,,mobile
102,8bbd9f45-73b4-42e0-8057-d4fc362c121a,Jeremy Yatchmenoff,https://play-lh.googleusercontent.com/a/AGNmyx...,The best,5,5,1.0.1.364,2021-11-03 15:02:20,,,mobile
103,6a62c0a3-c52d-4900-9fd4-32a10c277afe,David Shaw,https://play-lh.googleusercontent.com/a-/ACB-R...,Nice app,5,8,1.0.1.364,2021-10-22 06:44:09,,,mobile


In [4]:
final_df.to_csv(path+'/'+'final_submission.csv', index=False) 
print("Final Submission File SAVED!")
print("Final File Shape: ", final_df.shape)
final_df.head()

Final Submission File SAVED!
Final File Shape:  (13234, 11)


Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appName
0,d505016e-3b37-4ba2-97cf-16c9ef54f84c,David Webster,https://play-lh.googleusercontent.com/a-/ACB-R...,Found great bar jobs on here in the past but n...,1,0,3.2.11,2023-02-08 19:41:20,,,candidate
1,eae5efd4-26eb-469d-b793-f9689510ebbf,Joshua Espinoza,https://play-lh.googleusercontent.com/a/AGNmyx...,Try to upload resume and it just stays uploading!,2,0,3.2.6,2022-09-26 04:18:21,,,candidate
2,c7156674-d0bf-497e-924e-07802e9cda9d,Robert Basom,https://play-lh.googleusercontent.com/a-/ACB-R...,"This is one of the worst apps I've ever used,w...",1,0,,2022-07-20 08:11:02,,,candidate
3,520a4e2f-d352-4fe4-a63b-ceea058663bf,Daniel DLS,https://play-lh.googleusercontent.com/a-/ACB-R...,If I cant upload a resume because for whatever...,1,1,3.2.5,2022-06-27 23:48:52,,,candidate
4,254d1260-7a30-4ae1-8c68-53f597c3c84c,Morris Kagunya,https://play-lh.googleusercontent.com/a-/ACB-R...,It's not logging in,1,0,3.2.3,2022-05-16 22:59:26,,,candidate


In [14]:
#creating a nested condition to insert values according to the rating
# if score equal to 3 then neutral
# if score greater than 3 then positive
# if score less than 3 then negative
final_df['sentiment'] = np.where(final_df['score'] == 3, 'neutral', np.where(final_df['score'] <3, 'negative', np.where(final_df['score'] >3, 'positive', np.nan)))
final_df

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appName,sentiment
0,d505016e-3b37-4ba2-97cf-16c9ef54f84c,David Webster,https://play-lh.googleusercontent.com/a-/ACB-R...,Found great bar jobs on here in the past but n...,1,0,3.2.11,2023-02-08 19:41:20,,,candidate,negative
1,eae5efd4-26eb-469d-b793-f9689510ebbf,Joshua Espinoza,https://play-lh.googleusercontent.com/a/AGNmyx...,Try to upload resume and it just stays uploading!,2,0,3.2.6,2022-09-26 04:18:21,,,candidate,negative
2,c7156674-d0bf-497e-924e-07802e9cda9d,Robert Basom,https://play-lh.googleusercontent.com/a-/ACB-R...,"This is one of the worst apps I've ever used,w...",1,0,,2022-07-20 08:11:02,,,candidate,negative
3,520a4e2f-d352-4fe4-a63b-ceea058663bf,Daniel DLS,https://play-lh.googleusercontent.com/a-/ACB-R...,If I cant upload a resume because for whatever...,1,1,3.2.5,2022-06-27 23:48:52,,,candidate,negative
4,254d1260-7a30-4ae1-8c68-53f597c3c84c,Morris Kagunya,https://play-lh.googleusercontent.com/a-/ACB-R...,It's not logging in,1,0,3.2.3,2022-05-16 22:59:26,,,candidate,negative
...,...,...,...,...,...,...,...,...,...,...,...,...
100,45d71edd-c573-4bc2-a219-ac7e31cb90ad,Alan J,https://play-lh.googleusercontent.com/a-/ACB-R...,Rather tasty,5,7,1.0.1.364,2021-11-14 16:15:49,,,mobile,positive
101,6d98a697-0814-46f1-b8b8-fbbcb3f9f3b3,Tracy Nguyen,https://play-lh.googleusercontent.com/a-/ACB-R...,I am about to do it myself to make it happen a...,5,9,,2021-11-13 03:00:17,,,mobile,positive
102,8bbd9f45-73b4-42e0-8057-d4fc362c121a,Jeremy Yatchmenoff,https://play-lh.googleusercontent.com/a/AGNmyx...,The best,5,5,1.0.1.364,2021-11-03 15:02:20,,,mobile,positive
103,6a62c0a3-c52d-4900-9fd4-32a10c277afe,David Shaw,https://play-lh.googleusercontent.com/a-/ACB-R...,Nice app,5,8,1.0.1.364,2021-10-22 06:44:09,,,mobile,positive


In [15]:
#Checking if any row has null values
final_df['sentiment'].isnull().values.sum() 

0

In [17]:
high_rated_df = final_df[final_df['score'] > 3.99]
high_rated_df

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appName,sentiment
5,b0fad2f1-e43b-42a2-8271-4ceb396bbd55,AnnaLiza Coelho,https://play-lh.googleusercontent.com/a-/ACB-R...,Helpful!,5,0,3.2.2,2022-04-09 20:07:46,,,candidate,positive
6,098e9eef-cd64-41d4-b9e4-a46fc1356dda,Kadar Harris,https://play-lh.googleusercontent.com/a/AGNmyx...,Great,4,0,3.0.1,2022-03-25 23:27:33,,,candidate,positive
21,0037392a-4fae-4033-9831-c1a5d5f0847d,Adam Febre,https://play-lh.googleusercontent.com/a-/ACB-R...,Love.,5,0,3.1.15,2021-06-14 19:30:11,,,candidate,positive
27,6d93065d-a75d-4566-8ca1-3a5afbbb0d1d,Janessa Jordan,https://play-lh.googleusercontent.com/a-/ACB-R...,Good app to sign up and fill out job applicati...,5,2,3.1.15,2020-08-18 00:31:11,,,candidate,positive
31,0b3d424c-edd3-4d76-bce1-d3238ed7d35d,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,It ok.,4,0,3.1.13,2020-01-31 22:57:34,,,candidate,positive
...,...,...,...,...,...,...,...,...,...,...,...,...
100,45d71edd-c573-4bc2-a219-ac7e31cb90ad,Alan J,https://play-lh.googleusercontent.com/a-/ACB-R...,Rather tasty,5,7,1.0.1.364,2021-11-14 16:15:49,,,mobile,positive
101,6d98a697-0814-46f1-b8b8-fbbcb3f9f3b3,Tracy Nguyen,https://play-lh.googleusercontent.com/a-/ACB-R...,I am about to do it myself to make it happen a...,5,9,,2021-11-13 03:00:17,,,mobile,positive
102,8bbd9f45-73b4-42e0-8057-d4fc362c121a,Jeremy Yatchmenoff,https://play-lh.googleusercontent.com/a/AGNmyx...,The best,5,5,1.0.1.364,2021-11-03 15:02:20,,,mobile,positive
103,6a62c0a3-c52d-4900-9fd4-32a10c277afe,David Shaw,https://play-lh.googleusercontent.com/a-/ACB-R...,Nice app,5,8,1.0.1.364,2021-10-22 06:44:09,,,mobile,positive
