In [7]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
pd.set_option('display.max_columns', 100)

## Load data

Read the Excel files. These are unmodified files from Outscraper's scraped results.

The files contain Google reviews for leasing companies, apartment complexes, and student housing for three different campustowns.

- Brigham Young University (Provo, UT)
- Penn State University (University Park, PA)
- University of Illinois at Urbana Champaign (Champaign, IL)

In [3]:
df_b = pd.read_csv('businesses.csv')
df_r = pd.read_csv('reviews.csv', parse_dates = ['review_datetime_utc', 'owner_answer_timestamp_datetime_utc'])

In [4]:
display(df_b.head(2))
display(df_r.head(2))

Unnamed: 0,campus,place_id,name,site,category,borough,street,city,postal_code,state,latitude,longitude,verified
0,Brigham Young University,ChIJqyXyo6GQTYcRXGfgeIpqc_I,Alpine Village,https://myalpinevillage.com/,Student housing center,Carterville,1378 Freedom Blvd 200 W,Provo,84604,Utah,40.252607,-111.661247,True
1,Brigham Young University,ChIJGbnTbamQTYcRtZfBrL52jhs,Palladium Apartments,https://www.palladiumprovo.com/,Housing complex,North Park,538 N Freedom Blvd Unit,Provo,84601,Utah,40.240917,-111.661714,True


Unnamed: 0,place_id,review_id,author_id,author_title,review_text,review_rating,review_img_url,review_datetime_utc,owner_answer,owner_answer_timestamp_datetime_utc,review_likes
0,ChIJqyXyo6GQTYcRXGfgeIpqc_I,ChdDSUhNMG9nS0VJQ0FnSUQ1OXRHZnNnRRAB,100512067005706825621,Laura Brown,Service requests take a very long time to be r...,3,,2023-10-24 12:46:43+00:00,"Dear Laura, \n\n We hear you loud and clear, t...",2023-10-24 14:56:01+00:00,0
1,ChIJqyXyo6GQTYcRXGfgeIpqc_I,ChZDSUhNMG9nS0VJQ0FnSUNwOUstOVpBEAE,102936107836751613731,Pete Haraguchi,I was thinking about giving two stars but I le...,1,https://lh5.googleusercontent.com/p/AF1QipPeIy...,2023-07-29 20:30:26+00:00,"Pete,\n\n Thank you for letting us know about ...",2023-07-29 22:46:48+00:00,12


In [5]:
df_r['author_id'].value_counts().head(10)

author_id
102348612710861589506    7
110198993908295025426    5
103654026925684314708    5
112488306677535389042    4
100521333138974362578    4
103342850229446137347    4
106641809643720930584    4
104920915517744748637    4
106195800689495927190    4
108798830676796182621    4
Name: count, dtype: int64

In [6]:
df_r[df_r['author_id'].isin(df_r['author_id'].value_counts().head(10).index)] \
    .drop(columns=['place_id', 'review_id', 'review_img_url']) \
    .sort_values('author_id')

Unnamed: 0,author_id,author_title,review_text,review_rating,review_datetime_utc,owner_answer,owner_answer_timestamp_datetime_utc,review_likes
10582,100521333138974362578,Jashwant Raj,far off from campus for students. nearby walma...,3,2017-03-28 15:24:18+00:00,"Hi Jashwant, thank you for your review! We app...",2017-03-28 16:07:11+00:00,0
7606,100521333138974362578,Jashwant Raj,best place in state College. economical rent r...,5,2017-07-19 04:42:06+00:00,,NaT,0
10248,100521333138974362578,Jashwant Raj,good for grad students as its not expensive.,4,2017-03-28 15:22:30+00:00,,NaT,0
7162,100521333138974362578,Jashwant Raj,best place for a student who wants to live alo...,5,2017-03-15 20:41:27+00:00,,NaT,1
5302,102348612710861589506,Kyle Gill,One hundred years ago the United States entere...,5,2017-02-17 22:44:42+00:00,,NaT,6
1524,102348612710861589506,Kyle Gill,5 stars for the simple fact I passed every cle...,5,2017-08-18 17:00:10+00:00,,NaT,4
1864,102348612710861589506,Kyle Gill,"Unbeknownst to most Americans, liberty was act...",5,2017-02-17 22:50:57+00:00,,NaT,8
4710,102348612710861589506,Kyle Gill,Most visible once every 29 days but also quite...,5,2017-02-07 23:43:55+00:00,,NaT,8
4632,102348612710861589506,Kyle Gill,They changed the name from The Archipelago to ...,5,2017-03-01 23:42:34+00:00,,NaT,3
2953,102348612710861589506,Kyle Gill,Not kidding with the name! You can see the bro...,5,2017-03-01 23:39:31+00:00,HA! We definitely have water near by (4 blocks...,2017-09-27 01:39:09+00:00,2


In [10]:
df_r['year'] = df_r['review_datetime_utc'].dt.year
df_r['month'] = df_r['review_datetime_utc'].dt.month

df_r.head(2)

Unnamed: 0,place_id,review_id,author_id,author_title,review_text,review_rating,review_img_url,review_datetime_utc,owner_answer,owner_answer_timestamp_datetime_utc,review_likes,year,month
0,ChIJqyXyo6GQTYcRXGfgeIpqc_I,ChdDSUhNMG9nS0VJQ0FnSUQ1OXRHZnNnRRAB,100512067005706825621,Laura Brown,Service requests take a very long time to be r...,3,,2023-10-24 12:46:43+00:00,"Dear Laura, \n\n We hear you loud and clear, t...",2023-10-24 14:56:01+00:00,0,2023,10
1,ChIJqyXyo6GQTYcRXGfgeIpqc_I,ChZDSUhNMG9nS0VJQ0FnSUNwOUstOVpBEAE,102936107836751613731,Pete Haraguchi,I was thinking about giving two stars but I le...,1,https://lh5.googleusercontent.com/p/AF1QipPeIy...,2023-07-29 20:30:26+00:00,"Pete,\n\n Thank you for letting us know about ...",2023-07-29 22:46:48+00:00,12,2023,7
