In [72]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sklearn.metrics import accuracy_score

# Gather data
events = pd.read_csv('decoration.csv')



# Define features
features = [ 'Name of Service Provider','Contact Number','Location','Price Range (INR)','Email Address']

# Preprocess data
events = events.drop_duplicates(subset=['Email Address']) # Remove duplicates
events = events[features].fillna('') # Fill any missing values with empty string

# Vectorize features
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(events.apply(lambda x: ' '.join(x.astype(str)), axis=1))
feature_names = vectorizer.get_feature_names()

# Build the model
def content_based_filtering(user_events, n_recommendations=10):
    # Get user's event history
    user_vector = vectorizer.transform([' '.join(user_events)])
    
    # Compute similarity scores between user's history and all events
    similarity_scores = cosine_similarity(user_vector, vectors).flatten()
    
    # Get top n most similar events
    top_indices = similarity_scores.argsort()[::-1][:n_recommendations]
    
    # Return event recommendations
    return events.iloc[top_indices]

# Train and test the model
user_events = ["udaipur","INR 25,000"]
recommendations = content_based_filtering(user_events, n_recommendations=3)
print(recommendations)

#to check accuracy


   Name of Service Provider Contact Number Location      Price Range (INR)  \
4             Festive Decor    -9414159918  Udaipur      25,000 - 2,50,000   
14        The Festive Touch     9829053528  Udaipur  Rs. 75,000 - 5,00,000   
21         The Flower Power     9950304525  Udaipur  Rs. 50,000 - 3,00,000   

                     Email Address  
4        shobhacreations@gmail.com  
14          info@prasangdecors.com  
21  perfectweddingsdecor@gmail.com  
Accuracy on test set: 0.00


In [59]:
events.groupby("Location").count()

Unnamed: 0_level_0,Name of Service Provider,Contact Number,Price Range (INR)
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"10-B, DDA Market, Rajouri Garden, New Delhi",1,1,1
"106, Garg Plaza, Road No. 44, Pitampura, New Delhi - 110034",1,1,1
"153-A, Ground Floor, Shahpur Jat, New Delhi - 110049",1,1,1
"17, Shanti Kunj Main, Opposite HDFC Bank, Vasant Kunj",1,1,1
"43, First Floor, North Avenue Road, Punjabi Bagh West, New Delhi - 110026",1,1,1
"505, Vishal Tower, Janakpuri District Center, Janakpuri, New Delhi - 110058",1,1,1
"9/80, Nehru Nagar, New Delhi - 110065",1,1,1
"A-22, Naraina Industrial Area, Phase 1, New Delhi",1,1,1
"A-290, Okhla Industrial Area, Phase-1, New Delhi - 110020",1,1,1
Ajmer,2,2,2


In [36]:
events.head()

Unnamed: 0,Name of Service Provider,Location,Contact Number,Email Address,Specialties,Price Range (INR),Rating
0,Name of Service Provider,Jaipur,-9829053289,info@royalweddingsjaipur.com,"Wedding decoration, Floral arrangements, Light...","50,000 - 5,00,000",5
1,Name of Service Provider,Jaipur,-9785090964,efiestadecorators@gmail.com,"Wedding decoration, Corporate events, Stage de...","30,000 - 3,00,000",4
2,Name of Service Provider,Jaipur,-8764093864,info@devikasakhuja.com,"Luxury wedding decoration, Floral arrangements...","1,00,000 - 10,00,000",5
3,Name of Service Provider,Jaipur,-9829054060,eliteweddingplanner@gmail.com,"Destination wedding decoration, Pre-wedding ce...","1,00,000 - 5,00,000",4
4,Name of Service Provider,Udaipur,-9414159918,shobhacreations@gmail.com,"Wedding decoration, Floral decoration, Stage d...","25,000 - 2,50,000",3


In [32]:
events['City']

0     Jodhpur
1     Jodhpur
2       Sikar
3     Udaipur
4     Bikaner
5       Alwar
6     Udaipur
7     Bikaner
8       Alwar
9      Jaipur
10      Ajmer
11     Jaipur
12      Ajmer
13       Kota
14       Kota
Name: City, dtype: object

Accuracy on test set: 0.00


In [79]:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# define the feature and target columns
X = events["Email Address"]
y = events["Location"]

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# define a pipeline to vectorize text and train a Multinomial Naive Bayes classifier
pipeline = Pipeline([
    ("vectorizer", TfidfVectorizer()),
    ("classifier", MultinomialNB())
])

# fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# make predictions on the test set
y_pred = pipeline.predict(X_test)

# calculate accuracy of the model on the test set
a = accuracy_score(y_test, y_pred)

# print the accuracy score
print("Accuracy on test set: {:.2f}".format(a*100))


Accuracy on test set: 36.36


In [80]:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# define the feature and target columns
X = events["Email Address"]
y = events["Location"]

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# define a pipeline to vectorize text and train a Random Forest Classifier
pipeline = Pipeline([
    ("vectorizer", TfidfVectorizer()),
    ("classifier", RandomForestClassifier())
])

# fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# make predictions on the test set
y_pred = pipeline.predict(X_test)

# calculate accuracy of the model on the test set
accuracy = accuracy_score(y_test, y_pred)

# print the accuracy score
print("Accuracy on test set: {:.2f}".format(accuracy))


Accuracy on test set: 0.18


In [82]:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# define the feature and target columns
X = events["Email Address"]
y = events["Location"]

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# define a pipeline to vectorize text and train a SVM Classifier
pipeline = Pipeline([
    ("vectorizer", TfidfVectorizer()),
    ("classifier", SVC())
])

# fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# make predictions on the test set
y_pred = pipeline.predict(X_test)

# calculate accuracy of the model on the test set
accuracy = accuracy_score(y_test, y_pred)

# print the accuracy score
print("Accuracy on test set: {:.2f}".format(accuracy))


Accuracy on test set: 0.36
