In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [19]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
pd.set_option('display.max_colwidth', None)



In [3]:

csv_file_path_1 = '/content/drive/MyDrive/Major_Project/City.csv'
csv_file_path_2 = '/content/drive/MyDrive/Major_Project/Places.csv'


df_city= pd.read_csv(csv_file_path_1)
df_places= pd.read_csv(csv_file_path_2)

# Display the first few rows of the DataFrame
print(df_city.head())

          City  Ratings Ideal_duration  Best_time_to_visit  \
0       Manali      4.5         02-Apr        October-June   
1   Leh Ladakh      4.6         05-Jul         JulyOctober   
2        Coorg      4.2         02-Mar      September-June   
3      Andaman      4.5         04-Jun       October-March   
4  Lakshadweep      4.0         04-Jun  September-February   

                                           City_desc  
0  [' One of the most popular hill stations in Hi...  
1  [" Ladakh is a union territory in the Kashmir ...  
2  [' Located amidst imposing mountains in Karnat...  
3  [' Replete with turquoise blue water beaches a...  
4  [" Formerly known as Laccadive Islands, Laksha...  


In [4]:
df_city.shape

(100, 5)

In [5]:
df_city.isnull().sum()

City                   0
Ratings                0
Ideal_duration         0
Best_time_to_visit    72
City_desc              0
dtype: int64

In [6]:
btv = df_city['Best_time_to_visit']
df_city_mod = df_city.drop(['Ideal_duration','Best_time_to_visit','Ratings'],axis=1)
df_city_mod

Unnamed: 0,City,City_desc
0,Manali,[' One of the most popular hill stations in Hi...
1,Leh Ladakh,"["" Ladakh is a union territory in the Kashmir ..."
2,Coorg,[' Located amidst imposing mountains in Karnat...
3,Andaman,[' Replete with turquoise blue water beaches a...
4,Lakshadweep,"["" Formerly known as Laccadive Islands, Laksha..."
...,...,...
95,Pushkar,[' Pushka is a small temple town located aroun...
96,Chittorgarh,"[' Located in South-Eastern Rajasthan, Chittor..."
97,Nahan,"[' Away from the buzzing crowd of the city, Na..."
98,Lavasa,"["" Known as India's newest hill station, the L..."


In [7]:
tfidf = TfidfVectorizer(stop_words="english")

tfidf_matrix = tfidf.fit_transform(df_city_mod['City_desc'])

print(tfidf_matrix.shape)

(100, 3721)


In [8]:
print(tfidf.get_feature_names_out())

['000' '050' '067' ... 'zones' 'zoo' 'zorbing']


In [9]:
custom_token_pattern = r'\b[a-zA-Z]+\b'
tfidf_vectorizer = TfidfVectorizer(token_pattern=custom_token_pattern,stop_words="english")

# Fit and transform the data
tfidf_matrix = tfidf_vectorizer.fit_transform(df_city_mod['City_desc'])
print(tfidf_matrix.shape)

(100, 3612)


In [10]:
tfidf_vectorizer.get_feature_names_out()

array(['aap', 'aarti', 'aati', ..., 'zones', 'zoo', 'zorbing'],
      dtype=object)

In [11]:
df_tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())
print(df_tfidf)

    aap  aarti  aati  abandon  abdandoned  abounds  abroad  absolute  \
0   0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
1   0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
2   0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
3   0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
4   0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
..  ...    ...   ...      ...         ...      ...     ...       ...   
95  0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
96  0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
97  0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
98  0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   
99  0.0    0.0   0.0      0.0         0.0      0.0     0.0       0.0   

    absolutely  abu  ...  yoga  yogi  yojana   young  younger   zanskar  \
0     0.000000  0.0  ...   0.0   0.0     0.0  0.0849      0.

In [12]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim

array([[1.        , 0.05385025, 0.03610766, ..., 0.02407961, 0.03974564,
        0.04391447],
       [0.05385025, 1.        , 0.02208131, ..., 0.01031907, 0.012415  ,
        0.0146601 ],
       [0.03610766, 0.02208131, 1.        , ..., 0.0487978 , 0.07127444,
        0.05815319],
       ...,
       [0.02407961, 0.01031907, 0.0487978 , ..., 1.        , 0.06655636,
        0.03223319],
       [0.03974564, 0.012415  , 0.07127444, ..., 0.06655636, 1.        ,
        0.04096921],
       [0.04391447, 0.0146601 , 0.05815319, ..., 0.03223319, 0.04096921,
        1.        ]])

In [13]:
cosine_sim.shape

(100, 100)

In [79]:
def recommend_destination(inp_dest, cosine_sim=cosine_sim):
    if ((inp_dest not in df_city_mod['City'].values) and inp_dest.lower() not in df_city_mod['City'].str.lower().values):
      return

    idx = df_city.loc[df_city_mod['City'].str.lower() == inp_dest.lower()].index[0]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    dest_indices = [i[0] for i in sim_scores]
    rec = list(df_city_mod['City'][i] for i in dest_indices)
    return rec

In [16]:
pref_pd = pd.DataFrame(columns = ['Prefered Place','Recommendations'])
pref_pd

Unnamed: 0,Prefered Place,Recommendations


In [89]:
prefered_location = 'kolkata'
recommend = recommend_destination(prefered_location)

pref_dict = {'Prefered Place':prefered_location,'Recommendations':recommend}
if ((prefered_location.lower() not in pref_pd['Prefered Place'].str.lower().values)) :
  if(type(recommend) is not type(None)):
    pref_pd = pref_pd.append(pref_dict,ignore_index=True)
    print('Recommendations added for {}!!!!'.format(prefered_location))
  else:
    print("Preferred Location is not in the dataset")
else:
  print('{} already in recommendations list. Please choose another city'.format(prefered_location))


kolkata already in recommendations list. Please choose another city


In [87]:
pref_pd

Unnamed: 0,Prefered Place,Recommendations
0,Jammu,"[Vaishno Devi, Amarnath, Chandigarh, Almora, Gulmarg]"
1,Puri,"[Digha, Mathura, Alibaug, Bhubaneswar, Lavasa]"
2,Manali,"[Kasol, Gulmarg, Shimla, Nainital, Jaisalmer]"
3,Jodhpur,"[Jaipur, Bikaner, Udaipur, Mathura, Kolkata]"
4,Jaipur,"[Agra, Kolkata, Delhi, Gwalior, Jodhpur]"
5,Kolkata,"[Jaipur, Bangalore, Delhi, Mumbai, Hyderabad]"
6,Delhi,"[Jaipur, Bhubaneswar, Agra, Kolkata, Chandigarh]"
7,Mumbai,"[Kolkata, Mahabaleshwar, Lavasa, Lonavala, Gwalior]"
8,Lonavala,"[Khandala, Mahabaleshwar, Pune, Matheran, Lavasa]"
9,Pune,"[Ahmedabad, Alibaug, Lavasa, Lonavala, Mahabaleshwar]"


In [84]:
pref_pd = pref_pd.drop(pref_pd.index[13])