In [1]:
# Importing necessary library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [39]:
# Load the CSV file
places = pd.read_csv('data/places.csv', delimiter=',')

# Inspect the data
print(places.head())
print(places.columns)

# If the columns are not split correctly, split them manually
if len(places.columns) == 1:
    # Split the single column into separate columns
    places = places['PlaceId,PlaceName,Country,Description,ImageURL'].str.split(',', expand=True)
    places.columns = ['PlaceId', 'PlaceName', 'Country', 'Description', 'ImageURL']

# Convert PlaceId to integer if necessary
places['PlaceId'] = places['PlaceId'].astype(int)

# Print the column names to confirm
print(places.columns)

# Display the first few rows of the DataFrame to verify the data
print(places.head())

   PlaceId           PlaceName    Country  \
0        1        Eiffel Tower     France   
1        2          Great Wall      China   
2        3        Machu Picchu       Peru   
3        4        Grand Canyon        USA   
4        5  Sydney Opera House  Australia   

                                      Description  \
0                         Iconic symbol of Paris.   
1  Ancient wall stretching across northern China.   
2   Historic Inca citadel in the Andes Mountains.   
3                       Famous canyon in Arizona.   
4        Iconic performing arts center in Sydney.   

                                            ImageURL  
0  https://images.pexels.com/photos/1530259/pexel...  
1  https://images.pexels.com/photos/2412603/pexel...  
2  https://images.pexels.com/photos/2929906/pexel...  
3  https://images.pexels.com/photos/63553/pexels-...  
4  https://images.pexels.com/photos/1878293/pexel...  
Index(['PlaceId', 'PlaceName', 'Country', 'Description', 'ImageURL'], dtype='obj

In [40]:
places.columns

Index(['PlaceId', 'PlaceName', 'Country', 'Description', 'ImageURL'], dtype='object')

In [4]:
places.head(
    5
)

Unnamed: 0,"PlaceId,PlaceName,Country,Description,ImageURL"
0,"1,Eiffel Tower,France,Iconic symbol of Paris.,..."
1,"2,Great Wall,China,Ancient wall stretching acr..."
2,"3,Machu Picchu,Peru,Historic Inca citadel in t..."
3,"4,Grand Canyon,USA,Famous canyon in Arizona.,h..."
4,"5,Sydney Opera House,Australia,Iconic performi..."


In [6]:
places.columns

Index(['PlaceId,PlaceName,Country,Description,ImageURL'], dtype='object')

In [7]:
users = pd.read_csv('data/users.csv', sep=";", on_bad_lines='skip', encoding='latin-1')

In [8]:
users

Unnamed: 0,"UserId,UserName,Age"
0,"201,Alice,67"
1,"202,Bob,47"
2,"203,Charlie,45"
3,"204,David,20"
4,"205,Eva,39"
5,"206,Frank,68"
6,"207,Grace,27"
7,"208,Hannah,34"
8,"209,Ivy,30"
9,"210,Jack,40"


In [19]:
ratings = pd.read_csv('data/ratings.csv', sep=";", on_bad_lines='skip', encoding='latin-1')

In [21]:
ratings[['UserId', 'PlaceId', 'Ratings']] = ratings['UserId,PlaceId,Ratings'].str.split(',', expand=True)

In [22]:
ratings.drop(columns=['UserId,PlaceId,Ratings'], inplace=True)

In [23]:
ratings['UserId'] = ratings['UserId'].astype(int)
ratings['PlaceId'] = ratings['PlaceId'].astype(int)
ratings['Ratings'] = ratings['Ratings'].astype(int)

In [24]:
print(ratings.columns)


Index(['UserId', 'PlaceId', 'Ratings'], dtype='object')


In [25]:
ratings['UserId'].value_counts()

UserId
208    10
246     8
206     8
223     6
243     6
222     6
245     6
237     6
211     6
219     6
228     6
215     6
212     5
220     5
203     5
229     5
231     5
247     4
232     4
239     4
224     4
234     4
214     4
241     4
209     4
235     4
230     4
207     4
221     3
240     3
216     3
227     3
218     3
242     3
248     3
202     3
236     3
226     3
225     3
204     3
233     2
238     2
205     2
201     2
210     2
217     2
249     1
244     1
213     1
Name: count, dtype: int64

In [26]:
x = ratings['UserId']

In [27]:
x


0      233
1      208
2      211
3      223
4      227
      ... 
195    210
196    224
197    229
198    203
199    206
Name: UserId, Length: 200, dtype: int64

In [29]:
x.shape

(200,)

In [41]:
ratings_with_places = ratings.merge(places, on='PlaceId')

In [42]:
ratings_with_places

Unnamed: 0,UserId,PlaceId,Ratings,PlaceName,Country,Description,ImageURL
0,233,89,9,Bryce Canyon,USA,Famous for its unique rock formations called h...,https://images.pexels.com/photos/545960/pexels...
1,208,27,9,Alhambra,Spain,Palace and fortress complex in Granada.,https://images.pexels.com/photos/2431436/pexel...
2,211,21,8,Burj Khalifa,UAE,Tallest building in the world.,https://images.pexels.com/photos/162031/dubai-...
3,223,82,3,Monteverde Cloud Forest,Costa Rica,Famous for its biodiversity and cloud forest.,https://images.pexels.com/photos/27200252/pexe...
4,227,75,2,Wulingyuan,China,Scenic area known for its quartzite sandstone ...,https://images.pexels.com/photos/8138726/pexel...
...,...,...,...,...,...,...,...
195,210,86,3,Bryggen,Norway,Historic harbour district in Bergen.,https://images.pexels.com/photos/18450266/pexe...
196,224,51,4,Masada,Israel,Ancient fortress on a plateau in the Judaean D...,https://images.pexels.com/photos/6638788/pexel...
197,229,45,8,Napa Valley,USA,Region in California known for its vineyards.,https://images.pexels.com/photos/39511/purple-...
198,203,56,10,Alcatraz Island,USA,Historic island and former prison in San Franc...,https://images.pexels.com/photos/23910628/pexe...


In [44]:
number_rating = ratings_with_places.groupby('PlaceName')['Ratings'].count().reset_index()

In [45]:
number_rating

Unnamed: 0,PlaceName,Ratings
0,Acropolis,1
1,Alcatraz Island,3
2,Alhambra,5
3,Angkor Wat,3
4,Arashiyama Bamboo Grove,2
...,...,...
65,Taj Mahal,3
66,Torres del Paine,4
67,Venice,1
68,Wulingyuan,2


In [46]:
places_pivot = ratings_with_places.pivot_table(columns='UserId', index='PlaceName', values= 'Ratings')

In [47]:
places_pivot

UserId,201,202,203,204,205,206,207,208,209,210,...,240,241,242,243,244,245,246,247,248,249
PlaceName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Acropolis,,,,,,1.0,,,,,...,,,,,,,,,,
Alcatraz Island,,,10.0,8.0,,,,,,,...,,,,,,,,,,
Alhambra,,,,,,,,9.0,,,...,,,,,,,,,10.0,
Angkor Wat,,,,,,,,8.0,,,...,,,,,,,,,,
Arashiyama Bamboo Grove,,,,,,,,,,,...,,5.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Taj Mahal,,,,,,,,,,,...,,,,,,,,,,
Torres del Paine,,,,,,2.0,,,,,...,,,,,,,,,,
Venice,,,,,,,,,,,...,,,,,,,,,,
Wulingyuan,,,,,,,,,,,...,,,,,,,3.0,,,


In [48]:
places_pivot.fillna(0, inplace=True)

In [49]:
places_pivot

UserId,201,202,203,204,205,206,207,208,209,210,...,240,241,242,243,244,245,246,247,248,249
PlaceName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Acropolis,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Alcatraz Island,0.0,0.0,10.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Alhambra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
Angkor Wat,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Arashiyama Bamboo Grove,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Taj Mahal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Torres del Paine,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Venice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wulingyuan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0


In [50]:
from scipy.sparse import csr_matrix

In [51]:
places_sparse = csr_matrix(places_pivot)

In [52]:
type(places_sparse)

scipy.sparse._csr.csr_matrix

In [53]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm= 'brute')

In [54]:
model.fit(places_sparse)