In [1]:
import pandas as pd

# Load dataset
df = pd.read_csv("project_ideas_dataset_1000.csv")

# Check basic info
print(df.shape)
print(df.head())

# Combine key text columns into a single field for vectorization
df["combined_text"] = (
    df["title"].fillna('') + " " +
    df["description"].fillna('') + " " +
    df["skills_required"].fillna('') + " " +
    df["goal"].fillna('')
)


(1000, 9)
   project_id                               title  \
0           1        Next-Gen Platform for Retail   
1           2  Intelligent System for Environment   
2           3        Next-Gen Platform for Retail   
3           4     Next-Gen Tool for Cybersecurity   
4           5      Predictive Tool for Healthcare   

                                         description          domain  \
0  Develop a blockchain project titled 'Next-Gen ...      Blockchain   
1  Develop a generative ai project titled 'Intell...   Generative AI   
2  Develop a cybersecurity project titled 'Next-G...   Cybersecurity   
3  Develop a education tech project titled 'Next-...  Education Tech   
4  Develop a robotics project titled 'Predictive ...        Robotics   

                        skills_required    difficulty  \
0  PyTorch, SQL, AWS, Solidity, FastAPI      Beginner   
1          Docker, Django, PyTorch, AWS  Intermediate   
2              PyTorch, TensorFlow, SQL  Intermediate   
3         

In [3]:
df.head()

Unnamed: 0,project_id,title,description,domain,skills_required,difficulty,goal,popularity_score,year,combined_text
0,1,Next-Gen Platform for Retail,Develop a blockchain project titled 'Next-Gen ...,Blockchain,"PyTorch, SQL, AWS, Solidity, FastAPI",Beginner,Gain hands-on experience in Blockchain.,468,2024,Next-Gen Platform for Retail Develop a blockch...
1,2,Intelligent System for Environment,Develop a generative ai project titled 'Intell...,Generative AI,"Docker, Django, PyTorch, AWS",Intermediate,Understand core concepts of Generative AI.,485,2023,Intelligent System for Environment Develop a g...
2,3,Next-Gen Platform for Retail,Develop a cybersecurity project titled 'Next-G...,Cybersecurity,"PyTorch, TensorFlow, SQL",Intermediate,Build deployable solutions in Cybersecurity.,499,2023,Next-Gen Platform for Retail Develop a cyberse...
3,4,Next-Gen Tool for Cybersecurity,Develop a education tech project titled 'Next-...,Education Tech,"CSS, Rust, HTML, Django",Beginner,Gain hands-on experience in Education Tech.,960,2024,Next-Gen Tool for Cybersecurity Develop a educ...
4,5,Predictive Tool for Healthcare,Develop a robotics project titled 'Predictive ...,Robotics,"AWS, Solidity, Python",Advanced,Build deployable solutions in Robotics.,640,2023,Predictive Tool for Healthcare Develop a robot...


In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Convert combined text into feature vectors
vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
X = vectorizer.fit_transform(df["combined_text"])


In [7]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_projects(user_input, top_n=5):
    user_vec = vectorizer.transform([user_input])
    similarities = cosine_similarity(user_vec, X).flatten()
    indices = similarities.argsort()[-top_n:][::-1]
    return df.iloc[indices][["title", "description", "domain", "skills_required", "goal"]]


In [9]:
user_query = "I am interested in machine learning and Python. I want to build something related to healthcare."
recommendations = recommend_projects(user_query, top_n=5)
print(recommendations)


                                    title  \
846   Predictive Assistant for Healthcare   
375  Intelligent Assistant for Healthcare   
518             Smart Tool for Healthcare   
550      Next-Gen Analyzer for Healthcare   
648     Automated Platform for Healthcare   

                                           description            domain  \
846  Develop a machine learning project titled 'Pre...  Machine Learning   
375  Develop a machine learning project titled 'Int...  Machine Learning   
518  Develop a machine learning project titled 'Sma...  Machine Learning   
550  Develop a machine learning project titled 'Nex...  Machine Learning   
648  Develop a machine learning project titled 'Aut...  Machine Learning   

                                skills_required  \
846                Python, PyTorch, HTML, React   
375      Node.js, Docker, Python, Solidity, AWS   
518                 React, JavaScript, Solidity   
550          AWS, Kubernetes, Rust, NLP, Docker   
648  AWS, FastAPI