In [9]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import re
from datetime import date

import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import csv
sentiment= SentimentIntensityAnalyzer()

In [10]:


def clean(s):
    try:
        X=re.sub('<br/>'," ",s)
        X=re.sub("\n","",X)
        return X
    except:
        print(s)
    return s

def getSentiment(s):
    sent = sentiment.polarity_scores(s)
    return float(sent['compound'])

def splitNameCol(s):
    cats=[item.strip() for item in s.split('·')]
    name=cats[0]
    
    rating,bedrooms,beds,baths="No Rating",None,None,None
    for item in cats[1:]:
        if 'room' in item:
            bedrooms=item.split(' ')[0]
        elif 'Studio' in item:
            bedrooms=0.5
        elif 'bed' in item and 'room' not in item:
            beds=item.split(' ')[0]
        elif 'bath' in item:
            baths=item.split(' ')[0]
        elif '★' in item:
            rating=item.split('★')[1]
   
    return pd.Series([name, rating,bedrooms, beds, baths])
                
def removeTags():
    L=[]
    with open('reviews.csv','r') as f:
        reader=csv.reader(f)
        c=0
        for row in reader:
            X=clean(row[5])
            L.append([row[0],row[1],row[2],row[3],row[4],X])
    with open('reviews_clean.csv','w') as f2:
        writer=csv.writer(f2)
        for row in L:
            writer.writerow(row)    
    
    
    

In [11]:
removeTags()

In [12]:
        
listings=pd.read_csv('listings.csv')
listings[['name', 'rating','bedrooms','beds','bathrooms']] = listings['name'].apply(splitNameCol)
reviews=pd.read_csv('reviews_clean.csv').dropna(subset=['comments'])
neighborhoods=pd.read_csv('neighbourhoods(1).csv')
calendar=pd.read_csv('calendar.csv')
#reviews=pd.read_csv('reviews.csv').dropna(subset=['comments'])
#reviews['comments']=reviews['comments'].apply(clean)
reviews['sentiment']=reviews['comments'].apply(getSentiment)
high_sentiment=reviews.loc[reviews['sentiment']>0.90]
print(high_sentiment[['comments', 'sentiment']])

                                                 comments  sentiment
0       Recommended! Very good value for a spacious, a...     0.9220
1       Our ten days visiting in Portland were enormou...     0.9081
2       We had a wonderful time staying in the area of...     0.9715
4       Clean, comfortable, quiet rooms; easygoing gen...     0.9583
5       Beautiful neighborhood, convenient to the airp...     0.9412
...                                                   ...        ...
470427  Great place! Kay was friendly and helpful. She...     0.9722
470428  PDX Retreat was immaculately renovated. Everyt...     0.9957
470430  Great host, very friendly and helpful!  The ho...     0.9446
470431  The space was well appointed for a gathering a...     0.9382
470434   Beautiful! Meg is great. Everything was perfect.     0.9184

[283576 rows x 2 columns]


In [13]:

merged=pd.merge(reviews,listings,left_on='listing_id', right_on='id',how='inner')
merged=merged[['listing_id','host_id','reviewer_id','comments','sentiment','rating','neighbourhood','number_of_reviews','price','bedrooms','bathrooms','reviews_per']]
merged=merged.rename(columns={'rating':'host rating'})
merged.head()


Unnamed: 0,listing_id,host_id,reviewer_id,comments,sentiment,host rating,neighbourhood,number_of_reviews,price,bedrooms,bathrooms
0,12899,49682,69327,"Recommended! Very good value for a spacious, a...",0.922,4.93,Concordia,616,80.0,2,1
1,12899,49682,72846,Our ten days visiting in Portland were enormou...,0.9081,4.93,Concordia,616,80.0,2,1
2,12899,49682,84196,We had a wonderful time staying in the area of...,0.9715,4.93,Concordia,616,80.0,2,1
3,12899,49682,89114,I stayed at Ali and David's place when I first...,0.7778,4.93,Concordia,616,80.0,2,1
4,12899,49682,100318,"Clean, comfortable, quiet rooms; easygoing gen...",0.9583,4.93,Concordia,616,80.0,2,1


In [None]:
#%pip install textblob

In [14]:
from textblob import TextBlob
import nltk
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     /Users/johnprichard/nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     /Users/johnprichard/nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /Users/johnprichard/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /Users/johnprichard/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     /Users/johnprichard/nltk_data...
[nltk_data]    |   Package basque_grammars is already up-to-date!
[nltk_

True

In [15]:
def PartsOfSpeech(s):
    nouns=""
    adjectives=""
    comma=""
    comma2=""
    pos=TextBlob(s).tags
    for item in pos:
        if 'NN' in item[1]:
            nouns+=f'{comma}{item[0]}'
            comma=','
        if 'JJ' in item[1]:
            adjectives+=f'{comma2}{item[0]}'
            comma2=','
    return pd.Series([nouns,adjectives])
            

In [16]:
merged[['nouns','adjectives']]=merged['comments'].apply(PartsOfSpeech)
merged.head()

Unnamed: 0,listing_id,host_id,reviewer_id,comments,sentiment,host rating,neighbourhood,number_of_reviews,price,bedrooms,bathrooms,nouns,adjectives
0,12899,49682,69327,"Recommended! Very good value for a spacious, a...",0.922,4.93,Concordia,616,80.0,2,1,"value,floor,Alison,David","good,spacious,airy,upper,welcoming,helpful"
1,12899,49682,72846,Our ten days visiting in Portland were enormou...,0.9081,4.93,Concordia,616,80.0,2,1,"days,Portland,Alison,David,warmth,accomodation...","ten,most,economical,lucky"
2,12899,49682,84196,We had a wonderful time staying in the area of...,0.9715,4.93,Concordia,616,80.0,2,1,"time,area,Alberta,Arts,beds,comfy,upstairs,wal...","wonderful,spacious,great,amazing,great"
3,12899,49682,89114,I stayed at Ali and David's place when I first...,0.7778,4.93,Concordia,616,80.0,2,1,"Ali,David,place,Portland,space,lots,room,Ali,D...","comfortable,private,knowledgeable"
4,12899,49682,100318,"Clean, comfortable, quiet rooms; easygoing gen...",0.9583,4.93,Concordia,616,80.0,2,1,"Clean,rooms,hosts,neighborhood,parks,restauran...","comfortable,quiet,generous,great,unlimited,able"


In [17]:
merged.to_csv('Reviews_Joined.csv')

In [None]:
print(merged.loc[23,'comments'])

for ind,row in merged.iterrows():
    
    if ind==24:
        print(ind)
        print(row)
        break

In [None]:
def clean(s):
    try:
        X=re.sub('<br/>'," ",s)
        X=re.sub("\n","",X)
        return X
    except:
        print(s)
    return s

s="I loved it it was fantastic<br/>Great place to stay\nFantastic food in the neighborhood"
s2="Huge space and friendly owners. just what we needed. Very nicely located close to Alberta street with bars and food and Portland hippies. \n<br/>\n<br/>Will definitely stay again if in PoOr"

print(clean(s2))

In [18]:
merged.head()

Unnamed: 0,listing_id,host_id,reviewer_id,comments,sentiment,host rating,neighbourhood,number_of_reviews,price,bedrooms,bathrooms,nouns,adjectives
0,12899,49682,69327,"Recommended! Very good value for a spacious, a...",0.922,4.93,Concordia,616,80.0,2,1,"value,floor,Alison,David","good,spacious,airy,upper,welcoming,helpful"
1,12899,49682,72846,Our ten days visiting in Portland were enormou...,0.9081,4.93,Concordia,616,80.0,2,1,"days,Portland,Alison,David,warmth,accomodation...","ten,most,economical,lucky"
2,12899,49682,84196,We had a wonderful time staying in the area of...,0.9715,4.93,Concordia,616,80.0,2,1,"time,area,Alberta,Arts,beds,comfy,upstairs,wal...","wonderful,spacious,great,amazing,great"
3,12899,49682,89114,I stayed at Ali and David's place when I first...,0.7778,4.93,Concordia,616,80.0,2,1,"Ali,David,place,Portland,space,lots,room,Ali,D...","comfortable,private,knowledgeable"
4,12899,49682,100318,"Clean, comfortable, quiet rooms; easygoing gen...",0.9583,4.93,Concordia,616,80.0,2,1,"Clean,rooms,hosts,neighborhood,parks,restauran...","comfortable,quiet,generous,great,unlimited,able"


In [31]:


calendar=pd.read_csv('calendar.csv')
calendar.head()

take_one=calendar.loc[calendar['listing_id']==1804923,]


print(len(take_one.loc[take_one['available']=='t',]))


0


In [28]:
take_one.dtypes


listing_id          int64
date               object
available          object
price              object
adjusted_price    float64
minimum_nights      int64
maximum_nights      int64
dtype: object

In [26]:
calendar

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,12899,2023-12-20,f,$80.00,,3,1125
1,12899,2023-12-21,f,$80.00,,3,1125
2,12899,2023-12-22,f,$80.00,,3,1125
3,12899,2023-12-23,f,$80.00,,3,1125
4,12899,2023-12-24,f,$80.00,,3,1125
...,...,...,...,...,...,...,...
1804920,1050118656923959922,2024-12-14,t,$361.00,,30,1125
1804921,1050118656923959922,2024-12-15,t,$361.00,,30,1125
1804922,1050118656923959922,2024-12-16,t,$361.00,,30,1125
1804923,1050118656923959922,2024-12-17,t,$361.00,,30,1125


In [32]:
reviews.columns

Index(['listing_id', 'id', 'date', 'reviewer_id', 'reviewer_name', 'comments',
       'sentiment'],
      dtype='object')

In [None]:
review