In [21]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# For handling warnings
import warnings
warnings.filterwarnings('ignore')

# Load the Excel file
file_path = 'Events (1).xlsx'
events_data = pd.read_excel(file_path)

# Fill missing descriptions with empty strings
events_data['Description'].fillna('', inplace=True)
events_data['Sub Category'].fillna('', inplace=True)

# Combine relevant text columns to create a single feature for the event
events_data['combined_features'] = events_data['Event Names'] + ' ' + events_data['Description'] + ' ' + events_data['Primary Category'] + ' ' + events_data['Sub Category']

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the combined features from event data
event_tfidf_matrix = vectorizer.fit_transform(events_data['combined_features'])

def preprocess_bios(bios):
    """
    Function to preprocess user bios
    """
    # Assuming bios is a list of strings
    bios = [bio.lower() for bio in bios]
    return bios

def recommend_events(user_bio, event_tfidf_matrix, events_data, top_n=10):
    """
    Recommend events based on the user's bio
    """
    # Transform the user bio using the same vectorizer
    user_bio_tfidf = vectorizer.transform([user_bio])

    # Calculate the cosine similarity between user bio and event features
    cosine_similarities = cosine_similarity(user_bio_tfidf, event_tfidf_matrix).flatten()

    # Get the indices of the top_n most similar events
    similar_event_indices = cosine_similarities.argsort()[-top_n:][::-1]

    # Get the most similar events
    recommended_events = events_data.iloc[similar_event_indices]

    return recommended_events

# Sample user bios
user_bios = [
    "I love photography and enjoy attending music concerts.",
    "Interested in historical walks and cultural events.",
]

# Preprocess user bios
processed_bios = preprocess_bios(user_bios)

# Create a DataFrame to store recommendations
recommendations_df = pd.DataFrame()

# Recommend events for each user bio and add to the DataFrame
for i, bio in enumerate(processed_bios):
    recommendations = recommend_events(bio, event_tfidf_matrix, events_data)
    recommendations['User Bio'] = user_bios[i]
    recommendations_df = pd.concat([recommendations_df, recommendations], ignore_index=True)

# Display the recommendations DataFrame
recommendations_df = recommendations_df[['Event ID', 'Event Names', 'Description', 'Primary Category', 'Sub Category']]
recommendations_df

#import ace_tools as tools; tools.display_dataframe_to_user(name="Event Recommendations", dataframe=recommendations_df)


Unnamed: 0,Event ID,Event Names,Description,Primary Category,Sub Category
0,EVT0074,Wildlife Photography,UPC Expeditions introducing its wildlife photo...,Photography,Travel Photography
1,EVT0187,Photo Walk,Embark on guided photography walk,Photography,Street Photography
2,EVT0086,Nature Photography Workshop,We are giving practical assignments in all out...,Photography,Nature Photography
3,EVT0002,Street Photography Workshop,Ten days of shooting includes two street photo...,Photography,Travel Photography
4,EVT0054,Portrait Photography Workshop,Capture captivating portraits that tell storie...,Photography,Model Photography
5,EVT0001,Food Photography Workshop,Calling all food lovers and aspiring photograp...,Photography,Food Photography
6,EVT0115,Music at the Rotunda,,Music and Entertainment,Concert
7,EVT0003,Ajay Atul Live,Let's enjoy the magic of Ajay-Atul’s music lik...,Music and Entertainment,Concert
8,EVT0059,Visitng Book Cafe,Love reading...? Like talking about books...? ...,Books and Education,Reading
9,EVT0142,Games Night at Mauji,Gather your friends and family for an evening ...,Music and Entertainment,Group Activity


In [32]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import spacy

# For handling warnings
import warnings
warnings.filterwarnings('ignore')

# Load the Excel file
file_path = 'Events (1).xlsx'
events_data = pd.read_excel(file_path)

# Fill missing descriptions with empty strings
events_data['Description'].fillna('', inplace=True)
events_data['Sub Category'].fillna('', inplace=True)

# Combine relevant text columns to create a single feature for the event
events_data['combined_features'] = events_data['Event Names'] + ' ' + events_data['Description'] + ' ' + events_data['Primary Category'] + ' ' + events_data['Sub Category']

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the combined features from event data
event_tfidf_matrix = vectorizer.fit_transform(events_data['combined_features'])

# Load SpaCy model for NLP tasks
import subprocess
import sys

# Function to install spaCy model
def install_spacy_model():
    subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])

# Try to load the model and install it if it fails
try:
    nlp = spacy.load('en_core_web_sm')
except OSError:
    install_spacy_model()
    nlp = spacy.load('en_core_web_sm')

def preprocess_bios(bios):
    """
    Function to preprocess user bios
    """
    processed_bios = []
    for bio in bios:
        doc = nlp(bio.lower())
        # Lemmatize and extract nouns
        nouns = [token.lemma_ for token in doc if token.pos_ in ['NOUN', 'ADJ']]
        processed_bios.append(' '.join(nouns))
    return processed_bios

def recommend_events(user_bio, event_tfidf_matrix, events_data, top_n=5):
    """
    Recommend events based on the user's bio
    """
    # Transform the user bio using the same vectorizer
    user_bio_tfidf = vectorizer.transform([user_bio])

    # Calculate the cosine similarity between user bio and event features
    cosine_similarities = cosine_similarity(user_bio_tfidf, event_tfidf_matrix).flatten()

    # Get the indices of the top_n most similar events
    similar_event_indices = cosine_similarities.argsort()[-top_n:][::-1]

    # Get the most similar events
    recommended_events = events_data.iloc[similar_event_indices]

    return recommended_events

# Sample user bios
user_bios = [
    "I love photography and enjoy attending music concerts.",
    "Interested in historical walks and cultural events.",
]

# Preprocess user bios
processed_bios = preprocess_bios(user_bios)

# Create a DataFrame to store recommendations
recommendations_df = pd.DataFrame()

# Recommend events for each user bio and add to the DataFrame
for i, bio in enumerate(processed_bios):
    recommendations = recommend_events(bio, event_tfidf_matrix, events_data)
    recommendations['User Bio'] = user_bios[i]
    recommendations_df = pd.concat([recommendations_df, recommendations], ignore_index=True)

# Display the recommendations DataFrame
recommendations_df = recommendations_df[['Event ID', 'Event Names', 'Description', 'Primary Category', 'Sub Category']]
recommendations_df

#import ace_tools as tools; tools.display_dataframe_to_user(name="Event Recommendations", dataframe=recommendations_df)


Unnamed: 0,Event ID,Event Names,Description,Primary Category,Sub Category
0,EVT0187,Photo Walk,Embark on guided photography walk,Photography,Street Photography
1,EVT0086,Nature Photography Workshop,We are giving practical assignments in all out...,Photography,Nature Photography
2,EVT0115,Music at the Rotunda,,Music and Entertainment,Concert
3,EVT0002,Street Photography Workshop,Ten days of shooting includes two street photo...,Photography,Travel Photography
4,EVT0074,Wildlife Photography,UPC Expeditions introducing its wildlife photo...,Photography,Travel Photography
5,EVT0187,Photo Walk,Embark on guided photography walk,Photography,Street Photography
6,EVT0004,Pune Heritage Walk,Heritage walk through old lanes and Potters co...,Travel,One Day Outing
7,EVT0060,Electronic Expo for you,The Electronics For You (EFY) Expo Pune is a p...,Tech and Innovation,Exhibition
8,EVT0057,Royal Temples Heritage Walk,With the rise of the imperial power in the cit...,Travel,One Day Outing
9,EVT0127,Old Pune Food Walk - explore authentic food,We will explore and try out these authentic fo...,Food and Drinks,Food Blogging


In [41]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import spacy

# For handling warnings
import warnings
warnings.filterwarnings('ignore')

# Load the Excel file
file_path = 'Events (1).xlsx'
events_data = pd.read_excel(file_path)

# Fill missing descriptions with empty strings
events_data['Description'].fillna('', inplace=True)
events_data['Sub Category'].fillna('', inplace=True)

# Combine relevant text columns to create a single feature for the event
events_data['combined_features'] = events_data['Event Names'] + ' ' + events_data['Description'] + ' ' + events_data['Primary Category'] + ' ' + events_data['Sub Category']

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the combined features from event data
event_tfidf_matrix = vectorizer.fit_transform(events_data['combined_features'])

# Load SpaCy model for NLP tasks
import subprocess
import sys

# Function to install spaCy model
def install_spacy_model():
    subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])

# Try to load the model and install it if it fails
try:
    nlp = spacy.load('en_core_web_sm')
except OSError:
    install_spacy_model()
    nlp = spacy.load('en_core_web_sm')

def preprocess_bios(bios):
    """
    Function to preprocess user bios
    """
    processed_bios = []
    for bio in bios:
        doc = nlp(bio.lower())
        # Lemmatize and extract nouns and adjectives
        nouns_adjs = [token.lemma_ for token in doc if token.pos_ in ['NOUN', 'ADJ']]
        processed_bios.append(' '.join(nouns_adjs))
    return processed_bios

def recommend_events(user_bio, event_tfidf_matrix, events_data, top_n=5):
    """
    Recommend events based on the user's bio
    """
    # Transform the user bio using the same vectorizer
    user_bio_tfidf = vectorizer.transform([user_bio])

    # Calculate the cosine similarity between user bio and event features
    cosine_similarities = cosine_similarity(user_bio_tfidf, event_tfidf_matrix).flatten()

    # Get the indices of the top_n most similar events
    similar_event_indices = cosine_similarities.argsort()[-top_n:][::-1]

    # Get the most similar events
    recommended_events = events_data.iloc[similar_event_indices]

    return recommended_events

# Sample user bios
user_bios = [
    "I love photography and enjoy attending music concerts.",
    "Interested in historical walks and cultural events.",
]

# Preprocess user bios
processed_bios = preprocess_bios(user_bios)

# Create a DataFrame to store recommendations
recommendations_df = pd.DataFrame()

# Recommend events for each user bio and add to the DataFrame
for i, bio in enumerate(processed_bios):
    recommendations = recommend_events(bio, event_tfidf_matrix, events_data)
    recommendations['User Bio'] = user_bios[i]
    recommendations_df = pd.concat([recommendations_df, recommendations], ignore_index=True)

# Display the recommendations DataFrame
recommendations_df = recommendations_df[['User Bio', 'Event Names', 'Description', 'Primary Category', 'Sub Category']]
recommendations_df

#import ace_tools as tools; tools.display_dataframe_to_user(name="Event Recommendations", dataframe=recommendations_df)


Unnamed: 0,User Bio,Event Names,Description,Primary Category,Sub Category
0,I love photography and enjoy attending music c...,Photo Walk,Embark on guided photography walk,Photography,Street Photography
1,I love photography and enjoy attending music c...,Nature Photography Workshop,We are giving practical assignments in all out...,Photography,Nature Photography
2,I love photography and enjoy attending music c...,Music at the Rotunda,,Music and Entertainment,Concert
3,I love photography and enjoy attending music c...,Street Photography Workshop,Ten days of shooting includes two street photo...,Photography,Travel Photography
4,I love photography and enjoy attending music c...,Wildlife Photography,UPC Expeditions introducing its wildlife photo...,Photography,Travel Photography
5,Interested in historical walks and cultural ev...,Photo Walk,Embark on guided photography walk,Photography,Street Photography
6,Interested in historical walks and cultural ev...,Pune Heritage Walk,Heritage walk through old lanes and Potters co...,Travel,One Day Outing
7,Interested in historical walks and cultural ev...,Electronic Expo for you,The Electronics For You (EFY) Expo Pune is a p...,Tech and Innovation,Exhibition
8,Interested in historical walks and cultural ev...,Royal Temples Heritage Walk,With the rise of the imperial power in the cit...,Travel,One Day Outing
9,Interested in historical walks and cultural ev...,Old Pune Food Walk - explore authentic food,We will explore and try out these authentic fo...,Food and Drinks,Food Blogging


In [3]:
import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import spacy
import subprocess
import sys
import nltk

# Download NLTK data
nltk.download('punkt')

# For handling warnings
import warnings
warnings.filterwarnings('ignore')

# Load the Excel file
file_path = 'Events (1).xlsx'
events_data = pd.read_excel(file_path)

# Fill missing descriptions with empty strings
events_data['Description'].fillna('', inplace=True)
events_data['Sub Category'].fillna('', inplace=True)

# Combine relevant text columns to create a single feature for the event
events_data['combined_features'] = events_data['Event Names'] + ' ' + events_data['Description'] + ' ' + events_data['Primary Category'] + ' ' + events_data['Sub Category']

# Load SpaCy model for NLP tasks
def install_spacy_model():
    subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])

# Try to load the model and install it if it fails
try:
    nlp = spacy.load('en_core_web_sm')
except OSError:
    install_spacy_model()
    nlp = spacy.load('en_core_web_sm')

def preprocess_bios(bios):
    """
    Function to preprocess user bios
    """
    processed_bios = []
    for bio in bios:
        doc = nlp(bio.lower())
        # Extract nouns and adjectives
        nouns_adjs = [token.text for token in doc if token.pos_ in ['NOUN', 'ADJ']]
        processed_bios.append(' '.join(nouns_adjs))
    return processed_bios

def recommend_events(user_bio, events_data, top_n=5):
    """
    Recommend events based on the user's bio using fuzzy matching
    """
    # Get the combined features for fuzzy matching
    combined_features = events_data['combined_features'].tolist()
    
    # Find the best matches using fuzzy matching
    matches = process.extract(user_bio, combined_features, limit=top_n)
    
    # Extract the indices of the best matches
    match_indices = [combined_features.index(match[0]) for match in matches]
    
    # Get the most similar events
    recommended_events = events_data.iloc[match_indices]
    
    return recommended_events

# Sample user bios
user_bios = [
    "I love photography and enjoy attending music concerts.",
    "Interested in historical walks and cultural events.",
]

# Preprocess user bios
processed_bios = preprocess_bios(user_bios)

# Create a DataFrame to store recommendations
recommendations_df = pd.DataFrame()

# Recommend events for each user bio and add to the DataFrame
for i, bio in enumerate(processed_bios):
    recommendations = recommend_events(bio, events_data)
    recommendations['User Bio'] = user_bios[i]
    recommendations_df = pd.concat([recommendations_df, recommendations], ignore_index=True)

# Display the recommendations DataFrame
recommendations_df = recommendations_df[['User Bio', 'Event Names', 'Description', 'Primary Category', 'Sub Category']]
recommendations_df

#import ace_tools as tools; tools.display_dataframe_to_user(name="Event Recommendations", dataframe=recommendations_df)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pratik\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Unnamed: 0,User Bio,Event Names,Description,Primary Category,Sub Category
0,I love photography and enjoy attending music c...,Food Photography Workshop,Calling all food lovers and aspiring photograp...,Photography,Food Photography
1,I love photography and enjoy attending music c...,Ajay Atul Live,Let's enjoy the magic of Ajay-Atul’s music lik...,Music and Entertainment,Concert
2,I love photography and enjoy attending music c...,Poetry and Comedy,"GOTU LIT WEDNESDAY'S, A COMEDY AND POETRY OPEN...",Music and Entertainment,"Poetry,Comedy"
3,I love photography and enjoy attending music c...,Theatre Shows,,Music and Entertainment,Drama
4,I love photography and enjoy attending music c...,Shopping Festival,,Music and Entertainment,Exhibition
5,Interested in historical walks and cultural ev...,Fluid Art on Canvas,"Also check out other Arts Events in Pune, Fine...",Art and Culture,Drawing and Painting
6,Interested in historical walks and cultural ev...,Resin Art Tray n Coasters Workshop,"Also check out other Arts Events in Pune, Work...",Art and Culture,Drawing and Painting
7,Interested in historical walks and cultural ev...,Come and Learn - Dance Choreography,,Dance,Dance for Specific Events
8,Interested in historical walks and cultural ev...,Comedy and Poetry Special Open Mic,An open mic event is an opportunity for indivi...,Art and Culture,"Comedy, Poetry"
9,Interested in historical walks and cultural ev...,Dairy Industry Expo 2024,In a short span of 6 years Dairy Industry Expo...,Tech and Innovation,Exhibition


### Code Flow and Explanation:

1. **Import Libraries**:
   - Import necessary libraries for data manipulation, fuzzy matching, and NLP: `pandas`, `fuzzywuzzy`, `spacy`, `subprocess`, `nltk`, and `warnings`.

2. **Download NLTK Data**:
   - Use `nltk.download('punkt')` to download necessary data for NLTK tokenization.

3. **Load Excel File**:
   - Load event data from the Excel file (`Events (1).xlsx`).
   - Fill missing descriptions and subcategories with empty strings.

4. **Create Combined Features**:
   - Combine event names, descriptions, primary category, and subcategory into a single feature column (`combined_features`) for each event.

5. **Load SpaCy Model**:
   - A helper function `install_spacy_model()` installs the SpaCy model if it is not available. The `nlp` model is used for natural language processing tasks.

6. **Preprocess User Bios**:
   - Function `preprocess_bios(bios)` converts user bios to lowercase and extracts nouns and adjectives using SpaCy, which are then stored as processed bios.

7. **Event Recommendation Function**:
   - The `recommend_events(user_bio, events_data, top_n=5)` function performs fuzzy matching between a user's bio and the combined features of events.
   - It returns the top `n` recommended events based on similarity.

8. **Sample User Bios**:
   - Two sample user bios are defined, representing interests like photography, music concerts, historical walks, and cultural events.

9. **Preprocess and Recommend Events**:
   - Each user bio is preprocessed, and recommendations are generated using fuzzy matching.
   - Recommendations are stored in a DataFrame (`recommendations_df`) and displayed.

