In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor

# Step 1: Load the CSV file
csv_file = "tourist_interest_sentences.csv"
data = pd.read_csv(csv_file, encoding='ISO-8859-1')


# Step 2: Preprocess the data
sentences = data['Sentence'].tolist()

# Assuming the rest of the columns contain the relevance scores for each class
labels = data[['Temples', 'Beaches', 'Forts', 'Lakes', 'NationalParks', "AdventureSports",	"Amusement",	"Nature",	"Cultural",	"UrbanDevelopment",	"Scenic"]].values

# Step 3: Vectorize the sentences using TF-IDF
vectorizer = TfidfVectorizer(max_features=300)  # Set max_features for dimensionality reduction
sentence_embeddings = vectorizer.fit_transform(sentences).toarray()

# Step 4: Train the model using MultiOutputRegressor with Random Forest
regressor = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
regressor.fit(sentence_embeddings, labels)

# Step 5: Predict relevance scores for a new sentence
def classify_sentence(input_sentence):
    embedding = vectorizer.transform([input_sentence]).toarray()
    scores = regressor.predict(embedding)
    return np.array(scores).flatten()

# Test with a new sentence
input_sentence = "I enjoy near the lakes and exploring the beauty of temples."
scores = classify_sentence(input_sentence)

# Output scores
classes = ["Temple", "Beach", "Fort", "Lake", "National Park"]
print("Input Sentence:", input_sentence)
for cls, score in zip(classes, scores):
    print(f"{cls}: {score:.2f}")


Input Sentence: I enjoy near the lakes and exploring the beauty of temples.
Temple: 0.75
Beach: 0.64
Fort: 0.23
Lake: 0.71
National Park: 0.36


In [2]:
import pickle
with open("regressor_model.pkl", "wb") as f:
    pickle.dump(regressor, f)

with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("Model and vectorizer saved successfully!")

Model and vectorizer saved successfully!
