In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Expanded dataset with more popular places in Delhi
places_data = pd.DataFrame({
    'place': [
        'India Gate', 'Qutub Minar', 'Hauz Khas Village', 'Lotus Temple', 'Adventure Island',
        'Connaught Place', 'Red Fort', 'Humayun’s Tomb', 'Jama Masjid', 'Akshardham Temple',
        'Chandni Chowk', 'Sarojini Nagar Market', 'Lodhi Gardens', 'Garden of Five Senses', 'Dilli Haat',
        'National Rail Museum', 'ISKCON Temple', 'Nehru Planetarium', 'Jantar Mantar', 'Purana Qila',
        'Khan Market', 'Rajpath', 'Safdarjung Tomb', 'Tughlaqabad Fort', 'National Museum'
    ],
    'type': [
        'romantic', 'historical', 'romantic', 'spiritual', 'adventurous',
        'shopping', 'historical', 'historical', 'spiritual', 'spiritual',
        'shopping', 'shopping', 'romantic', 'romantic', 'cultural',
        'educational', 'spiritual', 'educational', 'historical', 'historical',
        'shopping', 'romantic', 'historical', 'historical', 'educational'
    ],
    'best_time': [
        'day', 'day', 'night', 'day', 'day',
        'night', 'day', 'day', 'day', 'day',
        'night', 'day', 'day', 'day', 'day',
        'day', 'day', 'day', 'day', 'day',
        'night', 'day', 'day', 'day', 'day'
    ],
    'rating': [
        4.5, 4.7, 4.8, 4.6, 4.4,
        4.3, 4.9, 4.7, 4.6, 4.8,
        4.2, 4.3, 4.7, 4.5, 4.4,
        4.5, 4.7, 4.3, 4.4, 4.6,
        4.4, 4.5, 4.5, 4.6, 4.8
    ],
    'budget': [
        'low', 'medium', 'high', 'low', 'medium',
        'medium', 'medium', 'medium', 'low', 'medium',
        'low', 'low', 'low', 'low', 'low',
        'low', 'low', 'low', 'low', 'low',
        'high', 'low', 'low', 'low', 'medium'
    ]
})

# Simulate user preference input
user_preferences = {
    'type': 'romantic',
    'best_time': 'night',
    'rating': 4.5,
    'budget': 'low'
}

# Step 1: Combine features to create a content descriptor
places_data['content'] = (places_data['type'] + " " +
                          places_data['best_time'] + " " +
                          places_data['budget'])

# Step 2: Vectorize content
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(places_data['content'])

# Combine user preferences to a single string for vectorization
user_pref_text = user_preferences['type'] + " " + user_preferences['best_time'] + " " + user_preferences['budget']
user_pref_vector = tfidf.transform([user_pref_text])

# Step 4: Calculate similarity scores
similarity_scores = cosine_similarity(user_pref_vector, tfidf_matrix).flatten()

# Add similarity scores to the dataset
places_data['similarity_score'] = similarity_scores

# Additional filtering based on other preferences
filtered_places = places_data[places_data['rating'] >= user_preferences['rating']]

# Step 5: Recommend top 3 places
recommendations = filtered_places.sort_values('similarity_score', ascending=False).head(3)

print("Top 3 recommended places:")
print(recommendations[['place', 'similarity_score', 'rating', 'budget','best_time']])


Top 3 recommended places:
                    place  similarity_score  rating budget best_time
2       Hauz Khas Village          0.700956     4.8   high     night
0              India Gate          0.677637     4.5    low       day
13  Garden of Five Senses          0.677637     4.5    low       day


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from geopy.distance import geodesic  # Library to calculate geographical distances


# Step 1: Create a Larger Dataset with Popular Places in Delhi
places_data = pd.DataFrame({
    'place': [
        'India Gate', 'Qutub Minar', 'Hauz Khas Village', 'Lotus Temple', 'Adventure Island',
        'Red Fort', 'Humayun Tomb', 'Jama Masjid', 'Lodhi Garden', 'Connaught Place',
        'Akshardham Temple', 'Dilli Haat', 'Garden of Five Senses', 'National Rail Museum',
        'Kingdom of Dreams', 'Chandni Chowk', 'DLF CyberHub', 'Rashtrapati Bhavan',
        'Agrasen ki Baoli', 'National Museum', 'Sarojini Nagar Market', 'Select Citywalk Mall',
        'Nehru Planetarium', 'Raj Ghat', 'Jantar Mantar', 'ISKCON Temple', 'National Zoological Park',
        'Delhi Ridge', 'Bangla Sahib Gurudwara', 'Safdarjung Tomb'
    ],
    #iski value ko dynamic karne ka fayeda hai?
    'type': [
        'romantic', 'historical', 'romantic', 'spiritual', 'adventurous',
        'historical', 'historical', 'historical', 'romantic', 'romantic',
        'spiritual', 'cultural', 'romantic', 'educational', 'entertainment',
        'historical', 'entertainment', 'historical', 'historical', 'educational',
        'shopping', 'shopping', 'educational', 'historical', 'historical', 'spiritual',
        'wildlife', 'adventurous', 'spiritual', 'historical'
    ],
    'best_time': [
        'day', 'day', 'night', 'day', 'day',
        'day', 'day', 'day', 'morning', 'night',
        'day', 'night', 'evening', 'morning', 'evening',
        'day', 'evening', 'day', 'day', 'day',
        'day', 'evening', 'day', 'morning', 'day', 'day',
        'day', 'morning', 'morning', 'day'
    ],
    'rating': [
        4.5, 4.7, 4.3, 4.8, 4.1,
        4.6, 4.5, 4.4, 4.5, 4.7,
        4.9, 4.4, 4.6, 4.2, 4.8,
        4.6, 4.9, 4.7, 4.5, 4.2,
        4.3, 4.6, 4.4, 4.1, 4.7, 4.5,
        4.3, 4.4, 4.8, 4.5
    ],
    'budget': [
        'low', 'low', 'medium', 'medium', 'high',
        'low', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'low', 'high',
        'low', 'high', 'medium', 'low', 'low',
        'low', 'high', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'medium'
    ],
    'location': [
        (28.6129, 77.2295), (28.5245, 77.1855), (28.5494, 77.2001), (28.5535, 77.2588), (28.7373, 77.0689),
        (28.6562, 77.2410), (28.5933, 77.2507), (28.6507, 77.2334), (28.5930, 77.2196), (28.6315, 77.2167),
        (28.6127, 77.2773), (28.5686, 77.2097), (28.5168, 77.1874), (28.5850, 77.1734), (28.4691, 77.0305),
        (28.6565, 77.2303), (28.4941, 77.0891), (28.6143, 77.1995), (28.6260, 77.2240), (28.6111, 77.2197),
        (28.5650, 77.1996), (28.5296, 77.2193), (28.6063, 77.2036), (28.6421, 77.2507), (28.6271, 77.2166),
        (28.5244, 77.2057), (28.6076, 77.2448), (28.5961, 77.1673), (28.6266, 77.2090), (28.5881, 77.2053)
    ]
})

# Function to calculate the distance between user and each place
def calculate_distance(user_location, place_location):
    return geodesic(user_location, place_location).kilometers

# Example: Assume user's current location is Connaught Place, Delhi (latitude, longitude)
user_location = (28.6139, 77.2090)  # Coordinates for Connaught Place, Delhi

# Calculate distances and add to the DataFrame
places_data['distance'] = places_data['location'].apply(lambda loc: calculate_distance(user_location, loc))

# Combine multiple features into a single content descriptor
places_data['content'] = places_data['type'] + " " + places_data['best_time'] + " " + places_data['budget']

# Step 2: Vectorize content using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(places_data['content'])

# Simulate user preference input
user_preferences = "romantic night low"
user_pref_vector = tfidf.transform([user_preferences])

# Calculate similarity scores using cosine similarity
similarity_scores = cosine_similarity(user_pref_vector, tfidf_matrix).flatten()

# Normalize distance scores and integrate into the similarity score
places_data['distance_score'] = 1 / (places_data['distance'] + 1)  # +1 to avoid division by zero
places_data['similarity_score'] = similarity_scores * 0.7 + places_data['distance_score'] * 0.3  # Weighted combination

# Step 3: Recommend top 3 places based on the combined similarity score
recommendations = places_data.sort_values('similarity_score', ascending=False).head(3)

print("Top 3 recommended places:")
print(recommendations[['place', 'similarity_score', 'rating', 'budget','best_time','distance']])


Top 3 recommended places:
               place  similarity_score  rating  budget best_time  distance
9    Connaught Place          0.680978     4.7  medium     night  2.090893
2  Hauz Khas Village          0.620498     4.3  medium     night  7.201292
0         India Gate          0.562672     4.5     low       day  2.007946


In [None]:

#final iteration work on this only please done by Adit


import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from geopy.distance import geodesic

# Step 1: Create a Larger Dataset with Popular Places in Delhi
places_data = pd.DataFrame({
    'place': [
        'India Gate', 'Qutub Minar', 'Hauz Khas Village', 'Lotus Temple', 'Adventure Island',
        'Red Fort', 'Humayun Tomb', 'Jama Masjid', 'Lodhi Garden', 'Connaught Place',
        'Akshardham Temple', 'Dilli Haat', 'Garden of Five Senses', 'National Rail Museum',
        'Kingdom of Dreams', 'Chandni Chowk', 'DLF CyberHub', 'Rashtrapati Bhavan',
        'Agrasen ki Baoli', 'National Museum', 'Sarojini Nagar Market', 'Select Citywalk Mall',
        'Nehru Planetarium', 'Raj Ghat', 'Jantar Mantar', 'ISKCON Temple', 'National Zoological Park',
        'Delhi Ridge', 'Bangla Sahib Gurudwara', 'Safdarjung Tomb'
    ],
    'type': [
        'romantic', 'historical', 'romantic', 'spiritual', 'adventurous',
        'historical', 'historical', 'historical', 'romantic', 'romantic',
        'spiritual', 'cultural', 'romantic', 'educational', 'entertainment',
        'historical', 'entertainment', 'historical', 'historical', 'educational',
        'shopping', 'shopping', 'educational', 'historical', 'historical', 'spiritual',
        'wildlife', 'adventurous', 'spiritual', 'historical'
    ],
    'best_time': [
        'day', 'day', 'night', 'day', 'day',
        'day', 'day', 'day', 'morning', 'night',
        'day', 'night', 'evening', 'morning', 'evening',
        'day', 'evening', 'day', 'day', 'day',
        'day', 'evening', 'day', 'morning', 'day', 'day',
        'day', 'morning', 'morning', 'day'
    ],
    ''
    #avg value update karni hai
    #udapte user feedback through form avg se update kardenge
    'rating': [
        4.5, 4.7, 4.3, 4.8, 4.1,
        4.6, 4.5, 4.4, 4.5, 4.7,
        4.9, 4.4, 4.6, 4.2, 4.8,
        4.6, 4.9, 4.7, 4.5, 4.2,
        4.3, 4.6, 4.4, 4.1, 4.7, 4.5,
        4.3, 4.4, 4.8, 4.5
    ],

    'budget': [
        'low', 'low', 'medium', 'medium', 'high',
        'low', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'low', 'high',
        'low', 'high', 'medium', 'low', 'low',
        'low', 'high', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'medium'
    ],

    'location': [
        (28.6129, 77.2295), (28.5245, 77.1855), (28.5494, 77.2001), (28.5535, 77.2588), (28.7373, 77.0689),
        (28.6562, 77.2410), (28.5933, 77.2507), (28.6507, 77.2334), (28.5930, 77.2196), (28.6315, 77.2167),
        (28.6127, 77.2773), (28.5686, 77.2097), (28.5168, 77.1874), (28.5850, 77.1734), (28.4691, 77.0305),
        (28.6565, 77.2303), (28.4941, 77.0891), (28.6143, 77.1995), (28.6260, 77.2240), (28.6111, 77.2197),
        (28.5650, 77.1996), (28.5296, 77.2193), (28.6063, 77.2036), (28.6421, 77.2507), (28.6271, 77.2166),
        (28.5244, 77.2057), (28.6076, 77.2448), (28.5961, 77.1673), (28.6266, 77.2090), (28.5881, 77.2053)
    ],
    #dynamically change value from high->low ya low->high
    #api se solved, value--> low,med,high

    #0,1,2 avg leke crowded update
    'crowded': [
        'medium', 'low', 'high', 'low', 'high',
        'medium', 'medium', 'medium', 'low', 'high',
        'medium', 'medium', 'low', 'low', 'high',
        'high', 'high', 'low', 'low', 'low',
        'high', 'medium', 'low', 'low', 'low', 'low',
        'medium', 'low', 'medium', 'low'
    ],
    'crime_rate': [
        2, 3, 5, 2, 4,
        3, 2, 4, 1, 6,
        1, 3, 2, 2, 5,
        5, 3, 2, 2, 2,
        4, 3, 1, 3, 3, 1,
        2, 3, 2, 3
    ],
    # New feature representing weather suitability

    #does wheter and political outage allow
    # baarish horahi hai? kya dange horahe hai?
    #weather api if else, api of news
    'doesSituationAllow': [
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1,
        1, 1, 1, 1
    ]
})

# Function to calculate the distance between user and each place
def calculate_distance(user_location, place_location):
    return geodesic(user_location, place_location).kilometers

# Example: Assume user's current location is Connaught Place, Delhi (latitude, longitude)
user_location = (28.6139, 77.2090)  # Coordinates for Connaught Place, Delhi

# Calculate distances and add to the DataFrame
places_data['distance'] = places_data['location'].apply(lambda loc: calculate_distance(user_location, loc))

# Combine multiple features into a single content descriptor
places_data['content'] = places_data['type'] + " " + places_data['best_time'] + " " + places_data['budget'] + " " + places_data['crowded']

# Step 2: Vectorize content using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(places_data['content'])


# dry run

# Simulate user preference input
user_preferences = "spiritual night medium low"
user_pref_vector = tfidf.transform([user_preferences])


# Calculate similarity scores using cosine similarity
similarity_scores = cosine_similarity(user_pref_vector, tfidf_matrix).flatten()

# Normalize distance and crime rate scores
places_data['distance_score'] = 1 / (places_data['distance'] + 1)  # +1 to avoid division by zero
places_data['crime_rate_score'] = (10 - places_data['crime_rate']) / 10  # Normalize crime rate to 0-1 scale

# Integrate multiple features into the similarity score
places_data['similarity_score'] = (
    similarity_scores * 0.4 + #isme cosine dekh rahe hai
    places_data['distance_score'] * 0.05 +
    places_data['crime_rate_score'] * 0.05 +
    places_data['doesSituationAllow'] * 1  # Give a large weight to weather suitability
)
# Reduce the score by 0.5 and divide by 0.5
places_data['adjusted_similarity_score'] = (places_data['similarity_score'] - 1)/0.5
# Get top recommendations
top_recommendations = places_data.sort_values(by='similarity_score', ascending=False).head(5)

print("Top Recommendations for the User:")
print(top_recommendations[['place', 'adjusted_similarity_score','distance','doesSituationAllow','budget','crowded']])


Top Recommendations for the User:
                     place  adjusted_similarity_score  distance  \
25           ISKCON Temple                   0.654499  9.924421   
3             Lotus Temple                   0.646122  8.279173   
10       Akshardham Temple                   0.592909  6.681019   
28  Bangla Sahib Gurudwara                   0.559756  1.407535   
11              Dilli Haat                   0.503698  5.021023   

    doesSituationAllow  budget crowded  
25                   1  medium     low  
3                    1  medium     low  
10                   1  medium  medium  
28                   1  medium  medium  
11                   1  medium  medium  


In [None]:
#final iteration work on this only please done by apoorv and prawns


import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from geopy.distance import geodesic

# Step 1: Create a Larger Dataset with Popular Places in Delhi
places_data = pd.DataFrame({
    'place': [
        'India Gate', 'Qutub Minar', 'Hauz Khas Village', 'Lotus Temple', 'Adventure Island',
        'Red Fort', 'Humayun Tomb', 'Jama Masjid', 'Lodhi Garden', 'Connaught Place',
        'Akshardham Temple', 'Dilli Haat', 'Garden of Five Senses', 'National Rail Museum',
        'Kingdom of Dreams', 'Chandni Chowk', 'DLF CyberHub', 'Rashtrapati Bhavan',
        'Agrasen ki Baoli', 'National Museum', 'Sarojini Nagar Market', 'Select Citywalk Mall',
        'Nehru Planetarium', 'Raj Ghat', 'Jantar Mantar', 'ISKCON Temple', 'National Zoological Park',
        'Delhi Ridge', 'Bangla Sahib Gurudwara', 'Safdarjung Tomb'
    ],
    'type': [
        'romantic', 'historical', 'romantic', 'spiritual', 'adventurous',
        'historical', 'historical', 'historical', 'romantic', 'romantic',
        'spiritual', 'cultural', 'romantic', 'educational', 'entertainment',
        'historical', 'entertainment', 'historical', 'historical', 'educational',
        'shopping', 'shopping', 'educational', 'historical', 'historical', 'spiritual',
        'wildlife', 'adventurous', 'spiritual', 'historical'
    ],
    'best_time': [
        'day', 'day', 'night', 'day', 'day',
        'day', 'day', 'day', 'morning', 'night',
        'day', 'night', 'evening', 'morning', 'evening',
        'day', 'evening', 'day', 'day', 'day',
        'day', 'evening', 'day', 'morning', 'day', 'day',
        'day', 'morning', 'morning', 'day'
    ],
    ''
    #avg value update karni hai
    #udapte user feedback through form avg se update kardenge
    'rating': [
        4.5, 4.7, 4.3, 4.8, 4.1,
        4.6, 4.5, 4.4, 4.5, 4.7,
        4.9, 4.4, 4.6, 4.2, 4.8,
        4.6, 4.9, 4.7, 4.5, 4.2,
        4.3, 4.6, 4.4, 4.1, 4.7, 4.5,
        4.3, 4.4, 4.8, 4.5
    ],

    'budget': [
        'low', 'low', 'medium', 'medium', 'high',
        'low', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'low', 'high',
        'low', 'high', 'medium', 'low', 'low',
        'low', 'high', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'medium'
    ],

    'location': [
        (28.6129, 77.2294), (28.5245, 77.1855), (28.5495, 77.2036), (28.5535, 77.2591), (28.7238, 77.1135),
        (28.6562, 77.2410), (28.5933, 77.2507), (28.6507, 77.2334), (28.5931, 77.2197), (28.6304, 77.2177),
        (28.6127, 77.2773), (28.5733, 77.2075), (28.5134, 77.1975), (28.5850, 77.1814), (28.4679, 77.0689),
        (28.6505, 77.2303), (28.4950, 77.0895), (28.6143, 77.1994), (28.6261, 77.2250), (28.6119, 77.2193),
        (28.5773, 77.1963), (28.5283, 77.2190), (28.6038, 77.1981), (28.6406, 77.2495), (28.6271, 77.2166),
        (28.7293, 77.0970), (28.6067, 77.2454), (28.6162, 77.1687), (28.6259, 77.2090), (28.5893, 77.2106)
    ],
    #dynamically change value from high->low ya low->high
    #api se solved, value--> low,med,high

    #0,1,2 avg leke crowded update
    'crowded': [
        'medium', 'low', 'high', 'low', 'high',
        'medium', 'medium', 'medium', 'low', 'high',
        'medium', 'medium', 'low', 'low', 'high',
        'high', 'high', 'low', 'low', 'low',
        'high', 'medium', 'low', 'low', 'low', 'low',
        'medium', 'low', 'medium', 'low'
    ],
    'crime_rate': [
        2, 3, 5, 2, 4,
        3, 2, 4, 1, 6,
        1, 3, 2, 2, 5,
        5, 3, 2, 2, 2,
        4, 3, 1, 3, 3, 1,
        2, 3, 2, 3
    ],
    # New feature representing weather suitability

    #does wheter and political outage allow
    # baarish horahi hai? kya dange horahe hai?
    #weather api if else, api of news
    'doesSituationAllow': [
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1,
        1, 1, 1, 1
    ]
})

# Function to calculate the distance between user and each place
def calculate_distance(user_location, place_location):
    return geodesic(user_location, place_location).kilometers

# Example: Assume user's current location is Connaught Place, Delhi (latitude, longitude)
user_location = (28.6139, 77.2090)  # Coordinates for Connaught Place, Delhi

# Calculate distances and add to the DataFrame
places_data['distance'] = places_data['location'].apply(lambda loc: calculate_distance(user_location, loc))

# Combine multiple features into a single content descriptor
places_data['content'] = places_data['type'] + " " + places_data['best_time'] + " " + places_data['budget'] + " " + places_data['crowded']

# Step 2: Vectorize content using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(places_data['content'])

# dry run

# Simulate user preference input
user_preferences = "adventurous day high medium"
user_pref_vector = tfidf.transform([user_preferences])


# Calculate similarity scores using cosine similarity
similarity_scores = cosine_similarity(user_pref_vector, tfidf_matrix).flatten()

# Normalize distance and crime rate scores
places_data['distance_score'] = 1 / (places_data['distance'] + 1)  # +1 to avoid division by zero
places_data['crime_rate_score'] = (10 - places_data['crime_rate']) / 10  # Normalize crime rate to 0-1 scale

# Integrate multiple features into the similarity score
places_data['similarity_score'] = (
    similarity_scores * 0.38 + #isme cosine dekh rahe hai
    places_data['distance_score'] * 0.07 +
    places_data['crime_rate_score'] * 0.05 +
    places_data['doesSituationAllow'] * 1  # Give a large weight to weather suitability
)
# Reduce the score by 0.5 and divide by 0.5
places_data['adjusted_similarity_score'] = (places_data['similarity_score'] - 1)/0.5
# Get top recommendations
top_recommendations = places_data.sort_values(by='similarity_score', ascending=False).head(5)

print("Top Recommendations for the User:")
print(top_recommendations[['place', 'adjusted_similarity_score','distance','doesSituationAllow','budget','crowded']])

Top Recommendations for the User:
                 place  adjusted_similarity_score   distance  \
4     Adventure Island                   0.746113  15.345983   
27         Delhi Ridge                   0.570843   3.949483   
15       Chandni Chowk                   0.405366   4.559822   
6         Humayun Tomb                   0.390427   4.674131   
17  Rashtrapati Bhavan                   0.380031   0.939913   

    doesSituationAllow  budget crowded  
4                    1    high    high  
27                   1  medium     low  
15                   1     low    high  
6                    1  medium  medium  
17                   1  medium     low  


In [None]:
#more fine tuned model by Adit
#5th iteration

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from geopy.distance import geodesic

# Step 1: Create a Larger Dataset with Popular Places in Delhi
places_data = pd.DataFrame({
    'place': [
        'India Gate', 'Qutub Minar', 'Hauz Khas Village', 'Lotus Temple', 'Adventure Island',
        'Red Fort', 'Humayun Tomb', 'Jama Masjid', 'Lodhi Garden', 'Connaught Place',
        'Akshardham Temple', 'Dilli Haat', 'Garden of Five Senses', 'National Rail Museum',
        'Kingdom of Dreams', 'Chandni Chowk', 'DLF CyberHub', 'Rashtrapati Bhavan',
        'Agrasen ki Baoli', 'National Museum', 'Sarojini Nagar Market', 'Select Citywalk Mall',
        'Nehru Planetarium', 'Raj Ghat', 'Jantar Mantar', 'ISKCON Temple', 'National Zoological Park',
        'Delhi Ridge', 'Bangla Sahib Gurudwara', 'Safdarjung Tomb'
    ],
    'type': [
        'romantic', 'historical', 'romantic', 'spiritual', 'adventurous',
        'historical', 'historical', 'historical', 'romantic', 'romantic',
        'spiritual', 'cultural', 'romantic', 'educational', 'entertainment',
        'historical', 'entertainment', 'historical', 'historical', 'educational',
        'shopping', 'shopping', 'educational', 'historical', 'historical', 'spiritual',
        'wildlife', 'adventurous', 'spiritual', 'historical'
    ],
    'best_time': [
        'day', 'day', 'night', 'day', 'day',
        'day', 'day', 'day', 'morning', 'night',
        'day', 'night', 'evening', 'morning', 'evening',
        'day', 'evening', 'day', 'day', 'day',
        'day', 'evening', 'day', 'morning', 'day', 'day',
        'day', 'morning', 'morning', 'day'
    ],
    'rating': [
        4.5, 4.7, 4.3, 4.8, 4.1,
        4.6, 4.5, 4.4, 4.5, 4.7,
        4.9, 4.4, 4.6, 4.2, 4.8,
        4.6, 4.9, 4.7, 4.5, 4.2,
        4.3, 4.6, 4.4, 4.1, 4.7, 4.5,
        4.3, 4.4, 4.8, 4.5
    ],
    'budget': [
        'low', 'low', 'medium', 'medium', 'high',
        'low', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'low', 'high',
        'low', 'high', 'medium', 'low', 'low',
        'low', 'high', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'medium'
    ],
    'location': [
        (28.6129, 77.2294), (28.5245, 77.1855), (28.5495, 77.2036), (28.5535, 77.2591), (28.7238, 77.1135),
        (28.6562, 77.2410), (28.5933, 77.2507), (28.6507, 77.2334), (28.5931, 77.2197), (28.6304, 77.2177),
        (28.6127, 77.2773), (28.5733, 77.2075), (28.5134, 77.1975), (28.5850, 77.1814), (28.4679, 77.0689),
        (28.6505, 77.2303), (28.4950, 77.0895), (28.6143, 77.1994), (28.6261, 77.2250), (28.6119, 77.2193),
        (28.5773, 77.1963), (28.5283, 77.2190), (28.6038, 77.1981), (28.6406, 77.2495), (28.6271, 77.2166),
        (28.7293, 77.0970), (28.6067, 77.2454), (28.6162, 77.1687), (28.6259, 77.2090), (28.5893, 77.2106)
    ],
    'crowded': [
        'medium', 'low', 'high', 'low', 'high',
        'medium', 'medium', 'medium', 'low', 'high',
        'medium', 'medium', 'low', 'low', 'high',
        'high', 'high', 'low', 'low', 'low',
        'high', 'medium', 'low', 'low', 'low', 'low',
        'medium', 'low', 'medium', 'low'
    ],
    'crime_rate': [
        2, 3, 5, 2, 4,
        3, 2, 4, 1, 6,
        1, 3, 2, 2, 5,
        5, 3, 2, 2, 2,
        4, 3, 1, 3, 3, 1,
        2, 3, 2, 3
    ],
    'doesSituationAllow': [
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1,
        1, 1, 1, 1
    ]
})

# Filter out places where doesSituationAllow == 0
filtered_places_data = places_data[places_data['doesSituationAllow'] == 1].copy()

# Function to calculate the distance between user and each place
def calculate_distance(user_location, place_location):
    return geodesic(user_location, place_location).kilometers

# User's current location (example: Connaught Place, Delhi)
user_location = (28.6139, 77.2090)

# Calculate distances and add to the DataFrame
filtered_places_data['distance'] = filtered_places_data['location'].apply(lambda loc: calculate_distance(user_location, loc))

# Combine multiple features into a single content descriptor with higher weight for type
filtered_places_data['content'] = (filtered_places_data['type'] + " ") * 3 + filtered_places_data['best_time'] + " " + filtered_places_data['budget'] + " " + filtered_places_data['crowded']

# Step 2: Vectorize content using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(filtered_places_data['content'])

# Simulate user preference input
user_preferences = "historical day medium medium"
user_pref_vector = tfidf.transform([user_preferences])

# Calculate similarity scores using cosine similarity
similarity_scores = cosine_similarity(user_pref_vector, tfidf_matrix).flatten()

# Normalize distance and crime rate scores
filtered_places_data['distance_score'] = (1 / (filtered_places_data['distance'] + 1))  # Normalize by adding 1
filtered_places_data['crime_rate_score'] = (10 - filtered_places_data['crime_rate']) / 10  # Normalize crime rate to 0-1 scale

# Adjust weights for different features
filtered_places_data['similarity_score'] = (
    similarity_scores * 0.35 +  # Increased weight for cosine similarity
    filtered_places_data['distance_score'] * 0.20 +  # Adjusted distance weight
    filtered_places_data['rating'] / 5 * 0.20 +  # Normalized rating weight
    filtered_places_data['crime_rate_score'] * 0.15 +  # Crime rate weight
    filtered_places_data['doesSituationAllow'] * 0.10  # Current situation weight
)

# Step 4: Sort places by final score
filtered_places_data = filtered_places_data.sort_values(by='similarity_score', ascending=False)

# Display top 5 recommendations
top_recommendations = filtered_places_data[['place','type', 'similarity_score','best_time','budget','crowded']]
print(top_recommendations.head(5))


                 place        type  similarity_score best_time  budget crowded
17  Rashtrapati Bhavan  historical          0.758536       day  medium     low
6         Humayun Tomb  historical          0.703973       day  medium  medium
29     Safdarjung Tomb  historical          0.673672       day  medium     low
24       Jantar Mantar  historical          0.650561       day     low     low
5             Red Fort  historical          0.648335       day     low  medium


In [None]:
#more fine tuned model by Adit and dataset increased by responses of random people
#6th iteration
#DS updation
#6th iteration

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from geopy.distance import geodesic

# Step 1: Create a Larger Dataset with Popular Places in Delhi
places_data = pd.DataFrame({
    'place': [
        'India Gate', 'Qutub Minar', 'Hauz Khas Village', 'Lotus Temple', 'Adventure Island',
        'Red Fort', 'Humayun Tomb', 'Jama Masjid', 'Lodhi Garden', 'Connaught Place',
        'Akshardham Temple', 'Dilli Haat', 'Garden of Five Senses', 'National Rail Museum',
        'Kingdom of Dreams', 'Chandni Chowk', 'DLF CyberHub', 'Rashtrapati Bhavan',
        'Agrasen ki Baoli', 'National Museum', 'Sarojini Nagar Market', 'Select Citywalk Mall',
        'Nehru Planetarium', 'Raj Ghat', 'Jantar Mantar', 'ISKCON Temple', 'National Zoological Park',
        'Delhi Ridge', 'Bangla Sahib Gurudwara', 'Safdarjung Tomb', 'Majnu Ka Tila', 'Jhandewalan',
        'Sunder Nursery','Khan Market', 'Ski India'
    ],
    'type': [
        'romantic', 'historical', 'romantic', 'spiritual', 'adventurous',
        'historical', 'historical', 'historical', 'romantic', 'romantic',
        'spiritual', 'cultural', 'romantic', 'educational', 'entertainment',
        'historical', 'entertainment', 'historical', 'historical', 'educational',
        'shopping', 'shopping', 'educational', 'historical', 'historical', 'spiritual',
        'wildlife', 'adventurous', 'spiritual', 'historical','romantic', 'spiritual',
        'romantic', 'shopping' , 'entertainment'
    ],
    'best_time': [
        'day', 'day', 'night', 'day', 'day',
        'day', 'day', 'day', 'morning', 'night',
        'day', 'night', 'evening', 'morning', 'evening',
        'day', 'evening', 'day', 'day', 'day',
        'day', 'evening', 'day', 'morning', 'day', 'day',
        'day', 'morning', 'morning', 'day','night','morning',
        'morning' , 'night' , 'day'
    ],
    'rating': [
        4.5, 4.7, 4.3, 4.8, 4.1,
        4.6, 4.5, 4.4, 4.5, 4.7,
        4.9, 4.4, 4.6, 4.2, 4.8,
        4.6, 4.9, 4.7, 4.5, 4.2,
        4.3, 4.6, 4.4, 4.1, 4.7, 4.5,
        4.3, 4.4, 4.8, 4.5, 4.7,
        4.5, 4.6, 4.7, 4.7
    ],
    'budget': [
        'low', 'low', 'medium', 'medium', 'high',
        'low', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'low', 'high',
        'low', 'high', 'medium', 'low', 'low',
        'low', 'high', 'medium', 'low', 'low', 'medium',
        'medium', 'medium', 'medium', 'medium', 'medium',
        'low', 'low' , 'medium' , 'high'
    ],
    'location': [
        (28.6129, 77.2294), (28.5245, 77.1855), (28.5495, 77.2036), (28.5535, 77.2591), (28.7238, 77.1135),
        (28.6562, 77.2410), (28.5933, 77.2507), (28.6507, 77.2334), (28.5931, 77.2197), (28.6304, 77.2177),
        (28.6127, 77.2773), (28.5733, 77.2075), (28.5134, 77.1975), (28.5850, 77.1814), (28.4679, 77.0689),
        (28.6505, 77.2303), (28.4950, 77.0895), (28.6143, 77.1994), (28.6261, 77.2250), (28.6119, 77.2193),
        (28.5773, 77.1963), (28.5283, 77.2190), (28.6038, 77.1981), (28.6406, 77.2495), (28.6271, 77.2166),
        (28.7293, 77.0970), (28.6067, 77.2454), (28.6162, 77.1687), (28.6259, 77.2090), (28.5893, 77.2106),
        (28.7014, 77.2282), (28.6481, 77.2042), (28.5969, 77.2453), (28.6002, 77.2268), (28.5672, 77.3209)

    ],
    'crowded': [
        'medium', 'low', 'high', 'low', 'high',
        'medium', 'medium', 'medium', 'low', 'high',
        'medium', 'medium', 'low', 'low', 'high',
        'high', 'high', 'low', 'low', 'low',
        'high', 'medium', 'low', 'low', 'low', 'low',
        'medium', 'low', 'medium', 'low', 'high', 'medium',
        'medium', 'medium', 'low'
    ],
    'crime_rate': [
        2, 3, 5, 2, 4,
        3, 2, 4, 1, 6,
        1, 3, 2, 2, 5,
        5, 3, 2, 2, 2,
        4, 3, 1, 3, 3, 1,
        2, 3, 2, 3, 2, 2,
        2, 2, 1
    ],
    'doesSituationAllow': [
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1,
        1, 1, 1
    ]
})

# Filter out places where doesSituationAllow == 0
filtered_places_data = places_data[places_data['doesSituationAllow'] == 1].copy()

# Function to calculate the distance between user and each place
def calculate_distance(user_location, place_location):
    return geodesic(user_location, place_location).kilometers

# User's current location (example: Connaught Place, Delhi)
# user_location = (28.6139, 77.2090)

#user location is dtu

user_location=(28.7500,77.1175)

# Calculate distances and add to the DataFrame
filtered_places_data['distance'] = filtered_places_data['location'].apply(lambda loc: calculate_distance(user_location, loc))

# Combine multiple features into a single content descriptor with higher weight for type
filtered_places_data['content'] = (filtered_places_data['type'] + " ") * 3 + filtered_places_data['best_time'] + " " + filtered_places_data['budget'] + " " + filtered_places_data['crowded']

# Step 2: Vectorize content using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(filtered_places_data['content'])

# Simulate user preference input
user_preferences = "romantic evening low medium"
user_pref_vector = tfidf.transform([user_preferences])

# Calculate similarity scores using cosine similarity
similarity_scores = cosine_similarity(user_pref_vector, tfidf_matrix).flatten()
# print(similarity_scores)
# Normalize distance and crime rate scores
filtered_places_data['distance_score'] = (1 / (filtered_places_data['distance'] + 1))  # Normalize by adding 1
filtered_places_data['crime_rate_score'] = (10 - filtered_places_data['crime_rate']) / 10  # Normalize crime rate to 0-1 scale

# Adjust weights for different features
filtered_places_data['similarity_score'] = (
    similarity_scores * 0.35 +  # Increased weight for cosine similarity
    filtered_places_data['distance_score'] * 0.20 +  # Adjusted distance weight
    filtered_places_data['rating'] / 5 * 0.20 +  # Normalized rating weight
    filtered_places_data['crime_rate_score'] * 0.15 +  # Crime rate weight
    filtered_places_data['doesSituationAllow'] * 0.10  # Current situation weight
)

# Step 4: Sort places by final score
filtered_places_data = filtered_places_data.sort_values(by='similarity_score', ascending=False)

# Display top 5 recommendations
top_recommendations = filtered_places_data[['place','type', 'similarity_score','best_time','budget','crowded','distance']]
print(top_recommendations.head(5))

[0.65880354 0.12998807 0.56427699 0.10885562 0.         0.13536028
 0.12998807 0.13536028 0.62091028 0.56427699 0.10600223 0.07730615
 0.86960338 0.09240696 0.1837886  0.06592613 0.1837886  0.13536028
 0.12998807 0.09407526 0.04738736 0.23992007 0.09605271 0.12569005
 0.12998807 0.10885562 0.07867928 0.08720384 0.10363244 0.13536028
 0.56427699 0.10629391 0.64007573 0.09175396 0.04738736]
                    place      type  similarity_score best_time  budget  \
12  Garden of Five Senses  romantic          0.715412   evening  medium   
8            Lodhi Garden  romantic          0.641818   morning     low   
0              India Gate  romantic          0.640722       day     low   
32         Sunder Nursery  romantic          0.637088   morning     low   
30          Majnu Ka Tila  romantic          0.620785     night  medium   

   crowded   distance  
12     low  27.364251  
8      low  20.054393  
0   medium  18.721503  
32  medium  21.070315  
30    high  12.081972  
