# Step 1: Import packages

In [1]:
import pandas as pd # type: ignore
from sklearn.model_selection import train_test_split # type: ignore
from sklearn.ensemble import RandomForestClassifier # type: ignore
from sklearn.preprocessing import StandardScaler, OneHotEncoder # type: ignore
from sklearn.compose import ColumnTransformer # type: ignore
from sklearn.pipeline import Pipeline # type: ignore
from sklearn.metrics import accuracy_score # type: ignore

# Step 2: Load the dataset and preprocessing

In [2]:
df = pd.read_csv('project_recommendation_dataset.csv')
df

Unnamed: 0,project_name,domain,difficulty,required_skills,estimated_time,popularity,tags
0,Chatbot,Web Development,Advanced,"HTML, CSS, Flask",29,7,"blog, platform"
1,Blog Platform,Web Development,Advanced,"React, Django",40,6,"blog, platform"
2,Weather App,Data Science,Advanced,"HTML, CSS, Flask",5,2,"weather, app"
3,Crypto Tracker,Mobile App Development,Beginner,"SQL, PostgreSQL",6,9,"weather, app"
4,Blog Platform,Data Science,Beginner,"Java, Android Studio",16,2,"crypto, tracker"
...,...,...,...,...,...,...,...
395,Weather App,AI/ML,Intermediate,"HTML, CSS, Flask",36,7,"portfolio, web"
396,Stock Predictor,Cybersecurity,Advanced,"HTML, CSS, Flask",18,10,"recipe, finder"
397,E-commerce Website,Web Development,Beginner,"TensorFlow, Keras",29,1,"blog, platform"
398,To-Do App,AI/ML,Intermediate,"Python, NLP, Flask",17,6,"mobile, to-do"


In [3]:
df = df.drop('tags', axis=1)
df = df.drop('popularity', axis=1)
df

Unnamed: 0,project_name,domain,difficulty,required_skills,estimated_time
0,Chatbot,Web Development,Advanced,"HTML, CSS, Flask",29
1,Blog Platform,Web Development,Advanced,"React, Django",40
2,Weather App,Data Science,Advanced,"HTML, CSS, Flask",5
3,Crypto Tracker,Mobile App Development,Beginner,"SQL, PostgreSQL",6
4,Blog Platform,Data Science,Beginner,"Java, Android Studio",16
...,...,...,...,...,...
395,Weather App,AI/ML,Intermediate,"HTML, CSS, Flask",36
396,Stock Predictor,Cybersecurity,Advanced,"HTML, CSS, Flask",18
397,E-commerce Website,Web Development,Beginner,"TensorFlow, Keras",29
398,To-Do App,AI/ML,Intermediate,"Python, NLP, Flask",17


In [4]:
df.isnull().sum()

project_name       0
domain             0
difficulty         0
required_skills    0
estimated_time     0
dtype: int64

In [5]:
df.size

2000

In [6]:
df.shape

(400, 5)

In [7]:
df.columns

Index(['project_name', 'domain', 'difficulty', 'required_skills',
       'estimated_time'],
      dtype='object')

In [8]:
df['project_name'].unique()

array(['Chatbot', 'Blog Platform', 'Weather App', 'Crypto Tracker',
       'Stock Predictor', 'Recipe Finder', 'Social Media App',
       'Web Portfolio', 'To-Do App', 'E-commerce Website'], dtype=object)

In [9]:
df.dtypes

project_name       object
domain             object
difficulty         object
required_skills    object
estimated_time      int64
dtype: object

In [10]:
df['project_name'].value_counts()

E-commerce Website    54
Weather App           45
Crypto Tracker        40
Blog Platform         39
Web Portfolio         39
Recipe Finder         38
Social Media App      38
Stock Predictor       37
To-Do App             37
Chatbot               33
Name: project_name, dtype: int64

In [11]:
features=df[['domain', 'difficulty', 'required_skills','estimated_time']]
target=df['project_name']

# 3. Split the data into training and testing sets

In [12]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# 4. Define preprocessing for numeric and categorical data

In [13]:
numeric_features = ['estimated_time']
categorical_features = ['domain', 'difficulty', 'required_skills']

# 5. Create the preprocessing pipeline

In [14]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),  # Apply StandardScaler to numeric features
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)  # Apply OneHotEncoder to categorical features
    ])

# 6. Create the ipeline with preprocessing and RandomForestClassifier

In [15]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# 7. Train the pipeline (Ensure X_train is a DataFrame)


In [16]:
pipeline.fit(X_train, y_train)

# 8. Make predictions on the test data


In [17]:
y_pred = pipeline.predict(X_test)
y_pred

array(['To-Do App', 'Weather App', 'Web Portfolio', 'Social Media App',
       'Social Media App', 'Recipe Finder', 'Web Portfolio',
       'Blog Platform', 'Weather App', 'Social Media App',
       'Crypto Tracker', 'Weather App', 'Crypto Tracker', 'Blog Platform',
       'Blog Platform', 'Weather App', 'E-commerce Website', 'Chatbot',
       'To-Do App', 'Chatbot', 'Crypto Tracker', 'Crypto Tracker',
       'To-Do App', 'Stock Predictor', 'Stock Predictor', 'Blog Platform',
       'Crypto Tracker', 'Weather App', 'To-Do App', 'Weather App',
       'Social Media App', 'To-Do App', 'Chatbot', 'Stock Predictor',
       'Crypto Tracker', 'Web Portfolio', 'Stock Predictor',
       'Stock Predictor', 'Blog Platform', 'Social Media App', 'Chatbot',
       'Chatbot', 'Blog Platform', 'Weather App', 'To-Do App',
       'E-commerce Website', 'To-Do App', 'Recipe Finder',
       'Stock Predictor', 'Social Media App', 'Weather App',
       'Social Media App', 'Chatbot', 'To-Do App', 'Blog Platfo

# 9. Evaluate accuracy

In [29]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 8.75%


# Prediction sample

In [19]:
new_data = pd.DataFrame({
    'domain': ['Web Development'],    
    'difficulty': ['Beginner'],           # Difficulty level
    'required_skills': ['Python, Machine Learning'],  # Required skills
    'estimated_time': [30]                    # Estimated time in hours
})

In [20]:
predicted_project = pipeline.predict(new_data)
print(f"Predicted : {predicted_project[0]}")

Predicted : Web Portfolio


In [None]:
# from sklearn.tree import DecisionTreeClassifier


In [22]:
from joblib import dump # type: ignore

In [23]:
dump(pipeline,'./../savedModels/projectSuggest.joblib')

['./../savedModels/projectSuggest.joblib']