In [4]:
#import required libraries 

from sklearn.impute import SimpleImputer 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import sys


In [5]:
#import the dataset
dataset=pd.read_csv('attractions_of_ktm.csv')

In [6]:
print(dataset.head())

                  title avg_rating  voted_by            genre
0      Boudhanath Stupa        4.5      8897  Religious Sites
1  Swayambhunath Temple        4.5      6203  Religious Sites
2  Pashupatinath Temple        4.5      4937  Religious Sites
3     Chandragiri Hills        4.5       399        Mountains
4       Kopan Monastery        4.5       787  Religious Sites


In [14]:
top_30_dest=dataset['title'].head(30) #get the names of top 30 destinations in ktm
print(top_30_dest.values.tolist())

['Boudhanath Stupa', 'Swayambhunath Temple', 'Pashupatinath Temple', 'Chandragiri Hills', 'Kopan Monastery', 'Thamel', 'Garden of Dreams', 'Namo Buddha (Stupa)', 'Kathmandu Durbar Square', 'Asan', 'Shivapuri Nagarjun National Park', 'Kailashnath Mahadev', 'Dakshinkali Temple', 'Budhanilkantha', 'Hanuman Dhoka', 'The Crematoria', 'Kumari Chowk', 'Narayanhiti Palace Museum', 'Babar Mahal Revisted', 'Basantapur Tower', 'Phulchoki', 'Budhanilakantha Temple', 'Pullahari Monastery', 'National Botanical Gardens', 'Jaganath (Krishna) Temple', 'Indra Chowk', 'Kathesimbu Stupa', 'Akash Bhairav Temple', 'Mandala Street', 'Sankhu Village']


In [154]:
#replace the Nan with np.nan
dataset.replace('N',np.nan,inplace=True)

In [155]:
#print the number of missing values
print(pd.isna(dataset).sum().sum())

551


In [156]:
#convert the avg_rating column into float 
dataset['avg_rating']=pd.to_numeric(dataset['avg_rating'])

In [157]:
#avg ratings of the locations around Kathmandu
print(dataset['avg_rating'].mean())

4.834908282379099


In [158]:
#highest avg rating given to a location in Ktm 
print(dataset['avg_rating'].max())
#print locations given the highest ratings 
print(dataset[dataset['avg_rating']==dataset['avg_rating'].max()])

5.0
                                                  title  avg_rating  voted_by  \
41    Cathedral of the Assumption of the Blessed Vir...         5.0        12   
49                 Boudha Farmers Market at Utpala Cafe         5.0        15   
50                 Chhango Adventure Canyoning In Nepal         5.0        20   
66                                           Shanti Spa         5.0        15   
71                                Music Museum of Nepal         5.0         7   
...                                                 ...         ...       ...   
1820                                     NTB Adventures         5.0        22   
1821                            S Nepal Tours & Travels         5.0         1   
1822                             Best Trekking in Nepal         5.0        15   
1824                        Beyond Himalaya - Day Tours         5.0         6   
1825            Khumbu Nangpala Trekking and Expedition         5.0         3   

                       

In [159]:
#lowest avg rating given to a location in Ktm 
print(dataset['avg_rating'].min())
#print locations given the lowest ratings 
print(dataset[dataset['avg_rating']==dataset['avg_rating'].min()])

1.0
                                             title  avg_rating  voted_by  \
138                              Lokta Paper Craft         1.0         1   
154                      Department of Immigration         1.0         3   
1753                          Bajrayogini Pashmina         1.0         1   
1754                 Travel In Nepal Treks Pvt Ltd         1.0         1   
1759                                    Bunk Tribe         1.0         1   
1762                            Talixo - Kathmandu         1.0         1   
1803  Himalayan Little Buddha Treks And Expedition         1.0         1   

                                            genre  
138                Points of Interest & Landmarks  
154                          Government Buildings  
1753                      Speciality & Gift Shops  
1754  Multi-day Tours • 4WD, ATV & Off-Road Tours  
1759           Multi-day Tours • Helicopter Tours  
1762                             Taxis & Shuttles  
1803   Adrenaline & Ext

In [160]:
#replace the missing values with the mean of the columns 
imputer=SimpleImputer(missing_values=np.nan,strategy='constant',fill_value=3)
imputer.fit((dataset['avg_rating']).values.reshape(-1,1))
new_avg_rating=imputer.transform(dataset['avg_rating'].values.reshape(-1,1))

In [161]:
print(new)

[[4.5]
 [4.5]
 [4.5]
 ...
 [3. ]
 [3. ]
 [3. ]]


In [162]:
#replace the avg rating column with the new avg rating array 
dataset['avg_rating']=new_avg_rating

In [163]:
print(dataset)

                                    title  avg_rating  voted_by  \
0                        Boudhanath Stupa         4.5      8897   
1                    Swayambhunath Temple         4.5      6203   
2                    Pashupatinath Temple         4.5      4937   
3                       Chandragiri Hills         4.5       399   
4                         Kopan Monastery         4.5       787   
...                                   ...         ...       ...   
2345         Zeal Nepal Trek & Expedition         3.0         0   
2346                        Mohit Sunuwar         3.0         0   
2347  SITA (Travel Corporation India Ltd)         3.0         0   
2348                       Himalayan Hike         3.0         0   
2349                            River Fun         3.0         0   

                                            genre  
0                                 Religious Sites  
1                                 Religious Sites  
2                                 Relig

In [164]:
print(dataset['genre'].unique())

['Religious Sites' 'Mountains' 'Neighborhoods' 'Gardens'
 'Points of Interest & Landmarks • Religious Sites'
 'Historic Sites • Points of Interest & Landmarks' 'Flea & Street Markets'
 'National Parks' 'Monuments & Statues'
 'Points of Interest & Landmarks • Architectural Buildings'
 'Speciality Museums' 'Architectural Buildings' 'Lookouts'
 'Historic Walking Areas' 'Neighborhoods • Points of Interest & Landmarks'
 'Points of Interest & Landmarks • Historic Walking Areas' 'Parks'
 'Religious Sites • Monuments & Statues' 'Sports Complexes'
 'Points of Interest & Landmarks' 'Bodies of Water'
 'Art Museums • History Museums' 'Nature & Wildlife Areas • Forests'
 'Religious Sites • Churches & Cathedrals' 'Casinos' 'History Museums'
 'Farmers Markets' 'Sports Camps & Clinics' 'Art Galleries' 'Libraries'
 'Natural History Museums' 'Shopping Malls'
 'Points of Interest & Landmarks • Monuments & Statues'
 'Military Museums • History Museums'
 'Architectural Buildings • Religious Sites' 'Amuseme

In [165]:
dataset['genre']

0                                   Religious Sites
1                                   Religious Sites
2                                   Religious Sites
3                                         Mountains
4                                   Religious Sites
                           ...                     
2345       Multi-day Tours • River Rafting & Tubing
2346    Parasailing & Paragliding • Multi-day Tours
2347                Multi-day Tours • Balloon Rides
2348                   City Tours • Multi-day Tours
2349      River Rafting & Tubing • Taxis & Shuttles
Name: genre, Length: 2350, dtype: object

In [166]:
genre_arr=[] #store all values for each row in this array
for i in range(len(dataset['genre'])):
    arr=[] #array to store stripped items
    splitted_arr=dataset['genre'][i].split('•')
    for item in splitted_arr:
        new_stripped_item=item.strip()
        arr.append(new_stripped_item)
    genre_arr.append(arr)

dataset['genre']=genre_arr

In [167]:
dataset

Unnamed: 0,title,avg_rating,voted_by,genre
0,Boudhanath Stupa,4.5,8897,[Religious Sites]
1,Swayambhunath Temple,4.5,6203,[Religious Sites]
2,Pashupatinath Temple,4.5,4937,[Religious Sites]
3,Chandragiri Hills,4.5,399,[Mountains]
4,Kopan Monastery,4.5,787,[Religious Sites]
...,...,...,...,...
2345,Zeal Nepal Trek & Expedition,3.0,0,"[Multi-day Tours, River Rafting & Tubing]"
2346,Mohit Sunuwar,3.0,0,"[Parasailing & Paragliding, Multi-day Tours]"
2347,SITA (Travel Corporation India Ltd),3.0,0,"[Multi-day Tours, Balloon Rides]"
2348,Himalayan Hike,3.0,0,"[City Tours, Multi-day Tours]"


In [168]:
print(dataset['genre'])

0                                  [Religious Sites]
1                                  [Religious Sites]
2                                  [Religious Sites]
3                                        [Mountains]
4                                  [Religious Sites]
                            ...                     
2345       [Multi-day Tours, River Rafting & Tubing]
2346    [Parasailing & Paragliding, Multi-day Tours]
2347                [Multi-day Tours, Balloon Rides]
2348                   [City Tours, Multi-day Tours]
2349      [River Rafting & Tubing, Taxis & Shuttles]
Name: genre, Length: 2350, dtype: object


In [172]:
#append all the items in the individual list from genre into a super list 
super_genre_list=[]
for each_list in dataset['genre']:
    for item in each_list:
        super_genre_list.append(item)

#convert that into set to have only the unique values 
superset=set((super_genre_list))

In [171]:
print(superset)

{'Art Galleries', 'Architectural Buildings', 'Horse-Drawn Carriage Tours', 'Vespa, Scooter & Moped Tours', 'Amusement & Theme Parks', 'Archaeology Tours', 'Nature & Wildlife Tours', 'Antique Shops', 'Convention Centers', 'Neighborhoods', 'Motorcycle Tours', 'Government Buildings', 'Shopping Tours', 'Theaters', 'Dance Clubs & Discos', 'Motorcycle Trails', 'Distilleries', 'Visitor Centers', 'Kayaking & Canoeing', 'Ski & Snow Tours', 'Factory Tours', 'Other Outdoor Activities', 'National Parks', 'Distillery Tours', 'Bars & Clubs', 'Horseback Riding Tours', 'Canyoning & Rappelling Tours', 'Coffee & Tea Tours', 'Multi-day Tours', 'Historic Sites', 'Submarine Tours', 'Walking Tours', 'Waterfalls', 'Military Museums', '4WD, ATV & Off-Road Tours', 'Fishing Charters & Tours', 'Factory Outlets', 'Playgrounds', 'Game & Entertainment Centers', 'Forests', 'Mountains', 'Private Tours', 'Scenic Walking Areas', 'Safaris', 'Taxis & Shuttles', 'Food Tours', 'Dams', 'Zipline & Aerial Adventure Parks', 'B

In [87]:
print(len(superset))

108


In [234]:
#make checklists
industries_checklist=['Distilleries','Factory Tours','Factory Outlets','Lessons & Workshops', 'Wineries & Vineyards']
health_and_lifestyle_checklist=['Sports Camps & Clinics','Spas','Yoga & Pilates',]
food_checklist=['Coffee & Tea Tours','Food Tours','Wineries & Vineyards', 'Beer Tastings & Tours',\
               'Cooking Classes','Farmers Markets',]

entertainment_checklist=['Horse-Drawn Carriage Tours', 'Vespa, Scooter & Moped Tours', 'Amusement & Theme Parks',\
                        'Shopping Tours', 'Theaters', 'Dance Clubs & Discos','Other Outdoor Activities',\
                        'Bars & Clubs','Coffee & Tea Tours','Fishing Charters & Tours','Playgrounds', 'Game & Entertainment Centers',\
                        'Food Tours','Movie Theaters','Karaoke Bars','Theatre & Performances','Shopping Malls', 'Sports Complexes',\
                        'City Tours','Self-Guided Tours & Rentals','Farmers Markets', 'Flea & Street Markets',\
                        'Bar, Club & Pub Tours','Casinos',]

religious_checklist=['Art Galleries','Archaeology Tours','Antique Shops','Cultural Tours','Religious Sites','Monuments & Statues']

adventure_checklist=['Archaeology Tours', 'Nature & Wildlife Tours','Kayaking & Canoeing', 'Ski & Snow Tours',\
                    'Motorcycle Trails', 'Horseback Riding Tours', 'Canyoning & Rappelling Tours',\
                     'Safaris','Zipline & Aerial Adventure Parks', 'Biking Trails', 'Gear Rentals', 'Bike Tours',\
                     'Climbing Tours','Eco Tours','River Rafting & Tubing','Sports Camps & Clinics',\
                     'Parasailing & Paragliding', 'Balloon Rides', 'Photography Tours','4WD, ATV & Off-Road Tours',\
                     'Hiking & Camping Tours', 'Bus Tours','Hiking Trails', 'Adrenaline & Extreme Tours',\
                     'Helicopter Tours', 'Sightseeing Tours','Nature & Wildlife Areas','Submarine Tours',\
                    ]
nature_checklist=['Nature & Wildlife Tours','Dams','National Parks','Fishing Charters & Tours','Forests', 'Mountains', 'Private Tours',\
                  'Scenic Walking Areas','Safaris','Climbing Tours','Points of Interest & Landmarks','Wineries & Vineyards',\
                  'Water Sports','Valleys','Hiking Trails','Adrenaline & Extreme Tours','Lookouts', 'Helicopter Tours',\
                  'Sightseeing Tours', 'Air Tours','Bodies of Water', 'Nature & Wildlife Areas',\
                 ]

art_and_architecture_checklist=['Art Galleries','Architectural Buildings','Archaeology Tours','Antique Shops','Government Buildings',\
                      'Historic Sites','Military Museums','Speciality & Gift Shops','Theaters','Cultural Tours',\
                      'Natural History Museums','Points of Interest & Landmarks','Historical & Heritage Tours', "Children's Museums",\
                      'City Tours','Religious Sites','Paint & Pottery Studios','Churches & Cathedrals',\
                      'Art Museums','Libraries','Monuments & Statues','Lookouts','History Museums','Convention Centers',\
                      'Movie Theaters','Sightseeing Tours','Historic Walking Areas',
                      ]

history_checklist=['Art Galleries','Architectural Buildings','Archaeology Tours','Antique Shops','Government Buildings',\
                  'Multi-day Tours','Historic Sites','Military Museums','Speciality & Gift Shops',\
                  'Natural History Museums','Points of Interest & Landmarks','Historical & Heritage Tours', "Children's Museums",\
                  'City Tours','Religious Sites','Paint & Pottery Studios','Churches & Cathedrals',\
                  'Art Museums','Libraries','Monuments & Statues','Lookouts','History Museums','Convention Centers',\
                  'Factory Tours','Historic Walking Areas',
                  ]

## Adding more columns to genre to make it easier for recommendation 

Columns to add :<b> History,Art and Architecture,Religious,Nature,Adventure,Entertainment,Food,Health and Lifesyle,Industries</b>

Columns will be given value for each record as per the description in the original 'genre' column

In [293]:
def assign_history_col(dataset):
    '''assigns history column'''
    history_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            #history column
            if(any(check_item.lower() == item.lower() for check_item in history_checklist)):
                history_arr.append(True)
                break
            else:
                history_arr.append(False)
                break
    return history_arr

In [294]:
history_arr=assign_history_col(dataset)
dataset['history']=history_arr

In [295]:
def assign_art_and_architecture_col(dataset):
    '''assigns art_and_architecture column'''    
    art_and_architecture_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            if(any(check_item.lower() == item.lower() for check_item in art_and_architecture_checklist)):
                art_and_architecture_arr.append(True)
                break
            else:
                art_and_architecture_arr.append(False)
                break
    return art_and_architecture_arr

In [296]:
art_and_architecture_arr=assign_art_and_architecture_col(dataset)
dataset['art_and_architecture']=art_and_architecture_arr

In [297]:
def assign_nature_col(dataset):
    '''assigns nature column'''
    nature_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            if(any(check_item.lower() == item.lower() for check_item in nature_checklist)):
                nature_arr.append(True)
                break
            else:
                nature_arr.append(False)
                break
    return nature_arr

In [298]:
nature_arr=assign_nature_col(dataset)
dataset['nature']=nature_arr

In [299]:
def assign_adventure_col(dataset):
    '''assigns adventure column'''
    adventure_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            if(any(check_item.lower() == item.lower() for check_item in adventure_checklist)):
                adventure_arr.append(True)
                break
            else:
                adventure_arr.append(False)
                break
    return adventure_arr

In [300]:
adventure_arr=assign_adventure_col(dataset)
dataset['adventure']=adventure_arr

In [301]:
def assign_entertainment_col(dataset):
    '''assigns entertainment column'''
    entertainment_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            if(any(check_item.lower() == item.lower() for check_item in entertainment_checklist)):
                entertainment_arr.append(True)
                break
            else:
                entertainment_arr.append(False)
                break
    return entertainment_arr

In [302]:
entertainment_arr=assign_entertainment_col(dataset)
dataset['entertainment']=entertainment_arr

In [303]:
def assign_health_and_lifestyle_col(dataset):
    '''assigns health_and_lifestyle column'''
    health_and_lifestyle_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            if(any(check_item.lower() == item.lower() for check_item in health_and_lifestyle_checklist)):
                health_and_lifestyle_arr.append(True)
                break
            else:
                health_and_lifestyle_arr.append(False)
                break
    return health_and_lifestyle_arr

In [304]:
health_and_lifestyle_arr=assign_health_and_lifestyle_col(dataset)
dataset['health_and_lifestyle']=health_and_lifestyle_arr

In [305]:
def assign_food_col(dataset):
    '''assigns food column'''
    food_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            if(any(check_item.lower() == item.lower() for check_item in food_checklist)):
                food_arr.append(True)
                break
            else:
                food_arr.append(False)
                break
    return food_arr

In [306]:
food_arr=assign_food_col(dataset)
dataset['food']=food_arr

In [307]:
def assign_industries_col(dataset):
    '''assigns industries column'''
    industries_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            if(any(check_item.lower() == item.lower() for check_item in industries_checklist)):
                industries_arr.append(True)
                break
            else:
                industries_arr.append(False)
                break
    return industries_arr

In [308]:
industries_arr=assign_industries_col(dataset)
dataset['industries']=industries_arr

In [309]:
def assign_religious_col(dataset):
    '''assigns religious column'''
    religious_arr=[]
    #iterate through every row of dataset 
    for each_list in dataset['genre']:
        for item in each_list:
            if(any(check_item.lower() == item.lower() for check_item in religious_checklist)):
                religious_arr.append(True)
                break
            else:
                religious_arr.append(False)
                break
    return religious_arr

In [310]:
religious_arr=assign_religious_col(dataset)
dataset['religious']=religious_arr
print(dataset.head())

                  title  avg_rating  voted_by              genre  history  \
0      Boudhanath Stupa         4.5      8897  [Religious Sites]     True   
1  Swayambhunath Temple         4.5      6203  [Religious Sites]     True   
2  Pashupatinath Temple         4.5      4937  [Religious Sites]     True   
3     Chandragiri Hills         4.5       399        [Mountains]    False   
4       Kopan Monastery         4.5       787  [Religious Sites]     True   

   art_and_architecture  nature  adventure  entertainment  \
0                  True   False      False          False   
1                  True   False      False          False   
2                  True   False      False          False   
3                 False    True      False          False   
4                  True   False      False          False   

   health_and_lifestyle   food  industries  religious  
0                 False  False       False       True  
1                 False  False       False       True  
2 

In [312]:
#shape of datashape after updating the additional columns
print(dataset.shape)
print(dataset)

(2350, 13)
                                    title  avg_rating  voted_by  \
0                        Boudhanath Stupa         4.5      8897   
1                    Swayambhunath Temple         4.5      6203   
2                    Pashupatinath Temple         4.5      4937   
3                       Chandragiri Hills         4.5       399   
4                         Kopan Monastery         4.5       787   
...                                   ...         ...       ...   
2345         Zeal Nepal Trek & Expedition         3.0         0   
2346                        Mohit Sunuwar         3.0         0   
2347  SITA (Travel Corporation India Ltd)         3.0         0   
2348                       Himalayan Hike         3.0         0   
2349                            River Fun         3.0         0   

                                             genre  history  \
0                                [Religious Sites]     True   
1                                [Religious Sites]     Tru

In [313]:
dataset.to_csv('attractions_of_ktm_updated_columns.csv',index=None)