# IMPORT ALL THE LIBRARIES

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

# Step 1: Data Loading and Exploration

In [2]:
resources_df = pd.read_csv('educational_resources_dataset.csv')
preferences_df = pd.read_csv('user_preferences_dataset.csv')
interactions_df = pd.read_csv('interaction_dataset.csv')
ratings_df = pd.read_csv('external_metadata_dataset.csv')

# DATASET 1

In [3]:
resources_df

Unnamed: 0,Title,Description,Author/Instructor,Topic/Subject,Difficulty Level,Format,Duration/Length,Price,Ratings,Reviews,Tags/Keywords,Resource ID,Resource ID.1
0,Introduction to Human Anatomy,A comprehensive guide to Human Anatomy.,Laura Martinez,Human Anatomy,Advanced,Text,6.0,50.0,4.343940,40.0,Human Anatomy,575,575
1,Advanced Cognitive Science Concepts,A comprehensive guide to Cognitive Science.,Jane Smith,Cognitive Science,Intermediate,Interactive,10.0,10.0,3.739550,9.0,Cognitive Science,1377,1377
2,Advanced Photography Concepts,A comprehensive guide to Photography.,Michael Wilson,Photography,Beginner,Text,12.0,40.0,3.655596,53.0,Photography,1176,1176
3,Advanced Statistics Concepts,A comprehensive guide to Statistics.,Christopher Anderson,Statistics,Beginner,Interactive,4.0,10.0,4.431646,100.0,Statistics,118,118
4,Advanced International Relations Concepts,A comprehensive guide to International Relations.,Michael Wilson,International Relations,Intermediate,Text,6.0,30.0,4.246014,26.0,International Relations,1641,1641
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9918,,,,,,,,,,,,1195,1195
9919,,,,,,,,,,,,1561,1561
9920,,,,,,,,,,,,104,104
9921,,,,,,,,,,,,1030,1030


# DATASET 2

In [4]:
preferences_df

Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives
0,1001,575,3.289044,44,Cognitive Science,Text,Gain new skills
1,1001,1377,3.416622,22,Quantum Mechanics,Interactive,Gain new skills
2,1001,1176,1.742436,78,Wilderness Survival,Video,Gain new skills
3,1001,118,1.764804,99,Renewable Energy,Video,Gain new skills
4,1001,1641,3.979411,118,Cultural Studies,Video,Gain new skills
...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,Public Health,Text,Gain new skills
9919,2000,1561,1.532473,53,Music Composition,Text,Advance career
9920,2000,104,4.579405,92,Information Technology,Video,Gain new skills
9921,2000,1030,2.768971,8,Brand Management,Interactive,Advance career


# DATASET 3

In [5]:
interactions_df

Unnamed: 0,User ID,Resource ID,Interaction Type,Timestamp,Duration Spent (minutes)
0,1243,1618,View,2024-04-03 19:35:37.842203,99
1,1984,1616,Bookmark,2024-04-10 19:35:37.842203,8
2,1064,1158,View,2024-04-02 19:35:37.842203,8
3,1298,1574,View,2024-03-24 19:35:37.842203,63
4,1785,152,Like,2024-03-17 19:35:37.842203,97
...,...,...,...,...,...
4995,1620,1043,Like,2024-03-28 19:35:37.898592,35
4996,1017,160,View,2024-03-26 19:35:37.898592,11
4997,1468,1521,Bookmark,2024-04-08 19:35:37.898592,111
4998,1386,1575,View,2024-03-31 19:35:37.898592,98


# DATASET 4

In [6]:
ratings_df

Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives,Publication Year,Citation Count,Popularity Score
0,1001,575,3.289044,44,Cognitive Science,Text,Gain new skills,2021,580,6.062141
1,1001,1377,3.416622,22,Quantum Mechanics,Interactive,Gain new skills,2016,151,0.038819
2,1001,1176,1.742436,78,Wilderness Survival,Video,Gain new skills,2009,325,1.022936
3,1001,118,1.764804,99,Renewable Energy,Video,Gain new skills,2005,912,3.527955
4,1001,1641,3.979411,118,Cultural Studies,Video,Gain new skills,2013,225,7.902008
...,...,...,...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,Public Health,Text,Gain new skills,2020,335,5.686753
9919,2000,1561,1.532473,53,Music Composition,Text,Advance career,2013,411,7.428258
9920,2000,104,4.579405,92,Information Technology,Video,Gain new skills,2001,405,0.839829
9921,2000,1030,2.768971,8,Brand Management,Interactive,Advance career,2009,750,1.344146


In [7]:
# Explore the data
print("Resources data:")
print(resources_df.head())
print("\nPreferences data:")
print(preferences_df.head())
print("\nInteractions data:")
print(interactions_df.head())
print("\nRatings data:")
print(ratings_df.head())

Resources data:
                                       Title  \
0              Introduction to Human Anatomy   
1        Advanced Cognitive Science Concepts   
2              Advanced Photography Concepts   
3               Advanced Statistics Concepts   
4  Advanced International Relations Concepts   

                                         Description     Author/Instructor  \
0            A comprehensive guide to Human Anatomy.        Laura Martinez   
1        A comprehensive guide to Cognitive Science.            Jane Smith   
2              A comprehensive guide to Photography.        Michael Wilson   
3               A comprehensive guide to Statistics.  Christopher Anderson   
4  A comprehensive guide to International Relations.        Michael Wilson   

             Topic/Subject Difficulty Level       Format  Duration/Length  \
0            Human Anatomy         Advanced         Text              6.0   
1        Cognitive Science     Intermediate  Interactive             10.

# DATASETS FOR SUGGEST APP AND COURSES

In [122]:
import pandas as pd
import numpy as np
import random

# Load subject list
subjects = [
    "Mathematics", "Physics", "Chemistry", "Biology", "History", 
    "Geography", "Literature", "English Language", "Computer Science", "Economics",
    "Psychology", "Sociology", "Political Science", "Philosophy", "Art History",
    "Anthropology", "Environmental Science", "Astronomy", "Linguistics", "Business Administration"
]

# Generate sample data
data = []
for subject in subjects:
    for i in range(20):
        course_id = subject[:3].upper() + str(i).zfill(3)  # Example: MAT001
        course_name = f"{subject} Course {i+1}"
        description = f"This course covers various topics in {subject}."
        instructor = f"John Doe"
        level = random.choice(["Beginner", "Intermediate", "Advanced"])
        duration = random.randint(5, 50)  # Random duration in hours
        language = random.choice(["English", "Spanish", "French"])
        price = round(random.uniform(20, 200), 2)  # Random price
        rating = round(random.uniform(1, 5), 2)  # Random rating
        enrollment = random.randint(100, 10000)  # Random enrollment
        skills_covered = ", ".join(random.sample(["Skill 1", "Skill 2", "Skill 3", "Skill 4"], random.randint(1, 4)))
        prerequisites = ", ".join(random.sample(["Prereq 1", "Prereq 2", "Prereq 3"], random.randint(0, 3)))
        lecture_video_url = f"https://www.example.com/{subject.lower().replace(' ', '-')}/lecture_{i}"
        quiz_url = f"https://www.example.com/{subject.lower().replace(' ', '-')}/quiz_{i}"
        assignment_url = f"https://www.example.com/{subject.lower().replace(' ', '-')}/assignment_{i}"
        discussion_forum_url = f"https://www.example.com/{subject.lower().replace(' ', '-')}/forum_{i}"
        completion_certificate = random.choice(["Yes", "No"])
        
        data.append([course_id, course_name, subject, description, instructor, level, duration, language,
                     price, rating, enrollment, skills_covered, prerequisites, lecture_video_url,
                     quiz_url, assignment_url, discussion_forum_url, completion_certificate])

# Create DataFrame
columns = ["Course ID", "Course Name", "Subject", "Description", "Instructor", "Level", "Duration",
           "Language", "Price", "Rating", "Enrollment", "Skills Covered", "Prerequisites",
           "Lecture Video URL", "Quiz URL", "Assignment URL", "Discussion Forum URL",
           "Completion Certificate"]
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv("educational_courses_dataset.csv", index=False)


# Step 2: Data Preprocessing

In [8]:
# Data Preprocessing
# Resources Dataset
resources_df.dropna(inplace=True)  # Drop rows with missing values
resources_df.drop_duplicates(inplace=True)  # Remove duplicates

# Preferences Dataset
preferences_df.dropna(inplace=True)  # Drop rows with missing values
preferences_df.drop_duplicates(inplace=True)  # Remove duplicates

# Interactions Dataset
interactions_df.dropna(inplace=True)  # Drop rows with missing values
interactions_df.drop_duplicates(inplace=True)  # Remove duplicates

# Ratings Dataset
ratings_df.dropna(inplace=True)  # Drop rows with missing values
ratings_df.drop_duplicates(inplace=True)  # Remove duplicates

# feature engineering:-convert categorical  columns to numerical columns

In [9]:
data = pd.get_dummies(resources_df)

In [10]:
data

Unnamed: 0,Duration/Length,Price,Ratings,Reviews,Resource ID,Resource ID.1,Title_Advanced Aerospace Engineering Concepts,Title_Advanced Agriculture Concepts,Title_Advanced Anthropology Concepts,Title_Advanced Architecture Concepts,...,Tags/Keywords_Sociology,Tags/Keywords_Software Engineering,Tags/Keywords_Sports Science,Tags/Keywords_Statistics,Tags/Keywords_Supply Chain Management,Tags/Keywords_Tax Law,Tags/Keywords_Theater Arts,Tags/Keywords_Urban Planning,Tags/Keywords_Wilderness Survival,Tags/Keywords_Zoology
0,6.0,50.0,4.343940,40.0,575,575,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,10.0,10.0,3.739550,9.0,1377,1377,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,12.0,40.0,3.655596,53.0,1176,1176,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4.0,10.0,4.431646,100.0,118,118,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,6.0,30.0,4.246014,26.0,1641,1641,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,2.0,20.0,3.081263,9.0,723,723,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1996,4.0,10.0,3.693427,29.0,341,341,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1997,2.0,50.0,4.708298,77.0,1264,1264,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1998,10.0,40.0,3.888878,97.0,1192,1192,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
data.to_csv('cleaned_educational_resources.csv', index=False)

In [12]:
# from sklearn.preprocessing import LabelEncoder

# # Initialize LabelEncoder
# label_encoder = LabelEncoder()

# # Convert categorical variables to numerical using LabelEncoder
# for column in data.select_dtypes(include=['object']).columns:
#     data[column] = label_encoder.fit_transform(data[column])

# # Check the cleaned dataset
# data


In [13]:
from sklearn.preprocessing import LabelEncoder

from sklearn.preprocessing import LabelEncoder# Drop rows with missing values
preferences_df.dropna(inplace=True)

# Remove duplicates
preferences_df.drop_duplicates(inplace=True)

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Convert categorical variables to numerical using LabelEncoder
for column in preferences_df.select_dtypes(include=['object']).columns:
    preferences_df[column] = label_encoder.fit_transform(preferences_df[column])

In [14]:
preferences_df

Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives
0,1001,575,3.289044,44,15,1,1
1,1001,1377,3.416622,22,85,0,1
2,1001,1176,1.742436,78,98,2,1
3,1001,118,1.764804,99,87,2,1
4,1001,1641,3.979411,118,23,2,1
...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,82,1,1
9919,2000,1561,1.532473,53,68,1,0
9920,2000,104,4.579405,92,51,2,1
9921,2000,1030,2.768971,8,10,0,0


In [15]:
# Drop rows with missing values
interactions_df.dropna(inplace=True)

# Remove duplicates
interactions_df.drop_duplicates(inplace=True)

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Convert categorical variables to numerical using LabelEncoder
for column in interactions_df.select_dtypes(include=['object']).columns:
    interactions_df[column] = label_encoder.fit_transform(interactions_df[column])

In [16]:
interactions_df

Unnamed: 0,User ID,Resource ID,Interaction Type,Timestamp,Duration Spent (minutes)
0,1243,1618,2,249,99
1,1984,1616,0,345,8
2,1064,1158,2,235,8
3,1298,1574,2,119,63
4,1785,152,1,27,97
...,...,...,...,...,...
4995,1620,1043,1,184,35
4996,1017,160,2,157,11
4997,1468,1521,0,329,111
4998,1386,1575,2,222,98


In [17]:
# Drop rows with missing values
ratings_df.dropna(inplace=True)

# Remove duplicates
ratings_df.drop_duplicates(inplace=True)

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Convert categorical variables to numerical using LabelEncoder
for column in ratings_df.select_dtypes(include=['object']).columns:
    ratings_df[column] = label_encoder.fit_transform(ratings_df[column])

In [18]:
ratings_df

Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives,Publication Year,Citation Count,Popularity Score
0,1001,575,3.289044,44,15,1,1,2021,580,6.062141
1,1001,1377,3.416622,22,85,0,1,2016,151,0.038819
2,1001,1176,1.742436,78,98,2,1,2009,325,1.022936
3,1001,118,1.764804,99,87,2,1,2005,912,3.527955
4,1001,1641,3.979411,118,23,2,1,2013,225,7.902008
...,...,...,...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,82,1,1,2020,335,5.686753
9919,2000,1561,1.532473,53,68,1,0,2013,411,7.428258
9920,2000,104,4.579405,92,51,2,1,2001,405,0.839829
9921,2000,1030,2.768971,8,10,0,0,2009,750,1.344146


# Step 4: Model Selection and Training


In [19]:
import pandas as pd

# Load the current dataset
resources_df = pd.read_csv('educational_resources_dataset.csv')


# Load the previous dataset with "Resource ID" column
preferences_df = pd.read_csv('user_preferences_dataset.csv')

# Concatenate the datasets along the columns axis
data = pd.concat([resources_df, preferences_df['Resource ID']], axis=1)

# Check the combined dataset
print(data.head())

# Save the combined dataset if needed
data.to_csv('educational_resources_dataset.csv', index=False)

                                       Title  \
0              Introduction to Human Anatomy   
1        Advanced Cognitive Science Concepts   
2              Advanced Photography Concepts   
3               Advanced Statistics Concepts   
4  Advanced International Relations Concepts   

                                         Description     Author/Instructor  \
0            A comprehensive guide to Human Anatomy.        Laura Martinez   
1        A comprehensive guide to Cognitive Science.            Jane Smith   
2              A comprehensive guide to Photography.        Michael Wilson   
3               A comprehensive guide to Statistics.  Christopher Anderson   
4  A comprehensive guide to International Relations.        Michael Wilson   

             Topic/Subject Difficulty Level       Format  Duration/Length  \
0            Human Anatomy         Advanced         Text              6.0   
1        Cognitive Science     Intermediate  Interactive             10.0   
2          

In [20]:
resources_df = pd.read_csv('educational_resources_dataset.csv')
resources_df 

Unnamed: 0,Title,Description,Author/Instructor,Topic/Subject,Difficulty Level,Format,Duration/Length,Price,Ratings,Reviews,Tags/Keywords,Resource ID,Resource ID.1,Resource ID.2
0,Introduction to Human Anatomy,A comprehensive guide to Human Anatomy.,Laura Martinez,Human Anatomy,Advanced,Text,6.0,50.0,4.343940,40.0,Human Anatomy,575,575,575
1,Advanced Cognitive Science Concepts,A comprehensive guide to Cognitive Science.,Jane Smith,Cognitive Science,Intermediate,Interactive,10.0,10.0,3.739550,9.0,Cognitive Science,1377,1377,1377
2,Advanced Photography Concepts,A comprehensive guide to Photography.,Michael Wilson,Photography,Beginner,Text,12.0,40.0,3.655596,53.0,Photography,1176,1176,1176
3,Advanced Statistics Concepts,A comprehensive guide to Statistics.,Christopher Anderson,Statistics,Beginner,Interactive,4.0,10.0,4.431646,100.0,Statistics,118,118,118
4,Advanced International Relations Concepts,A comprehensive guide to International Relations.,Michael Wilson,International Relations,Intermediate,Text,6.0,30.0,4.246014,26.0,International Relations,1641,1641,1641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9918,,,,,,,,,,,,1195,1195,1195
9919,,,,,,,,,,,,1561,1561,1561
9920,,,,,,,,,,,,104,104,104
9921,,,,,,,,,,,,1030,1030,1030


In [21]:
resources_df.dropna(inplace=True)  # Drop rows with missing values
resources_df.drop_duplicates(inplace=True)  # Remove duplicates
resources_df

Unnamed: 0,Title,Description,Author/Instructor,Topic/Subject,Difficulty Level,Format,Duration/Length,Price,Ratings,Reviews,Tags/Keywords,Resource ID,Resource ID.1,Resource ID.2
0,Introduction to Human Anatomy,A comprehensive guide to Human Anatomy.,Laura Martinez,Human Anatomy,Advanced,Text,6.0,50.0,4.343940,40.0,Human Anatomy,575,575,575
1,Advanced Cognitive Science Concepts,A comprehensive guide to Cognitive Science.,Jane Smith,Cognitive Science,Intermediate,Interactive,10.0,10.0,3.739550,9.0,Cognitive Science,1377,1377,1377
2,Advanced Photography Concepts,A comprehensive guide to Photography.,Michael Wilson,Photography,Beginner,Text,12.0,40.0,3.655596,53.0,Photography,1176,1176,1176
3,Advanced Statistics Concepts,A comprehensive guide to Statistics.,Christopher Anderson,Statistics,Beginner,Interactive,4.0,10.0,4.431646,100.0,Statistics,118,118,118
4,Advanced International Relations Concepts,A comprehensive guide to International Relations.,Michael Wilson,International Relations,Intermediate,Text,6.0,30.0,4.246014,26.0,International Relations,1641,1641,1641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,Religious Studies for Beginners,A comprehensive guide to Religious Studies.,Laura Martinez,Religious Studies,Advanced,Interactive,2.0,20.0,3.081263,9.0,Religious Studies,723,723,723
1996,Urban Planning Essentials,A comprehensive guide to Urban Planning.,Jane Smith,Urban Planning,Intermediate,Text,4.0,10.0,3.693427,29.0,Urban Planning,341,341,341
1997,Mastering Human Rights,A comprehensive guide to Human Rights.,Laura Martinez,Human Rights,Beginner,Video,2.0,50.0,4.708298,77.0,Human Rights,1264,1264,1264
1998,Mastering Linguistic Anthropology,A comprehensive guide to Linguistic Anthropology.,David Johnson,Linguistic Anthropology,Advanced,Video,10.0,40.0,3.888878,97.0,Linguistic Anthropology,1192,1192,1192


In [22]:
resources_df1 = pd.get_dummies(data)
resources_df1.dropna(inplace=True)  # Drop rows with missing values
resources_df1.drop_duplicates(inplace=True)  # Remove duplicates

resources_df1

Unnamed: 0,Duration/Length,Price,Ratings,Reviews,Resource ID,Resource ID.1,Resource ID.2,Title_Advanced Aerospace Engineering Concepts,Title_Advanced Agriculture Concepts,Title_Advanced Anthropology Concepts,...,Tags/Keywords_Sociology,Tags/Keywords_Software Engineering,Tags/Keywords_Sports Science,Tags/Keywords_Statistics,Tags/Keywords_Supply Chain Management,Tags/Keywords_Tax Law,Tags/Keywords_Theater Arts,Tags/Keywords_Urban Planning,Tags/Keywords_Wilderness Survival,Tags/Keywords_Zoology
0,6.0,50.0,4.343940,40.0,575,575,575,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,10.0,10.0,3.739550,9.0,1377,1377,1377,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,12.0,40.0,3.655596,53.0,1176,1176,1176,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4.0,10.0,4.431646,100.0,118,118,118,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,6.0,30.0,4.246014,26.0,1641,1641,1641,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,2.0,20.0,3.081263,9.0,723,723,723,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1996,4.0,10.0,3.693427,29.0,341,341,341,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1997,2.0,50.0,4.708298,77.0,1264,1264,1264,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1998,10.0,40.0,3.888878,97.0,1192,1192,1192,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [59]:


# Create a new column 'Unique_Resource_ID' with unique numerical IDs
resources_df1['Unique_Resource_ID'] = range(1, len(resources_df1) + 1)
resources_df1['Unique_User_ID'] = range(1, len(resources_df1) + 1)
# Save the modified dataset
resources_df1.to_csv('modified_dataset.csv', index=False)


# Save the modified dataset
#data.to_csv('modified_dataset.csv', index=False)
resources_df1


Unnamed: 0,Duration/Length,Price,Ratings,Reviews,Resource ID,Resource ID.1,Resource ID.2,Title_Advanced Aerospace Engineering Concepts,Title_Advanced Agriculture Concepts,Title_Advanced Anthropology Concepts,...,Tags/Keywords_Sports Science,Tags/Keywords_Statistics,Tags/Keywords_Supply Chain Management,Tags/Keywords_Tax Law,Tags/Keywords_Theater Arts,Tags/Keywords_Urban Planning,Tags/Keywords_Wilderness Survival,Tags/Keywords_Zoology,Unique_Resource_ID,Unique_User_ID
0,6.0,50.0,4.343940,40.0,575,575,575,0,0,0,...,0,0,0,0,0,0,0,0,1,1
1,10.0,10.0,3.739550,9.0,1377,1377,1377,0,0,0,...,0,0,0,0,0,0,0,0,2,2
2,12.0,40.0,3.655596,53.0,1176,1176,1176,0,0,0,...,0,0,0,0,0,0,0,0,3,3
3,4.0,10.0,4.431646,100.0,118,118,118,0,0,0,...,0,1,0,0,0,0,0,0,4,4
4,6.0,30.0,4.246014,26.0,1641,1641,1641,0,0,0,...,0,0,0,0,0,0,0,0,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,2.0,20.0,3.081263,9.0,723,723,723,0,0,0,...,0,0,0,0,0,0,0,0,1996,1996
1996,4.0,10.0,3.693427,29.0,341,341,341,0,0,0,...,0,0,0,0,0,1,0,0,1997,1997
1997,2.0,50.0,4.708298,77.0,1264,1264,1264,0,0,0,...,0,0,0,0,0,0,0,0,1998,1998
1998,10.0,40.0,3.888878,97.0,1192,1192,1192,0,0,0,...,0,0,0,0,0,0,0,0,1999,1999


In [31]:
# Convert columns to integer or float
resources_df1['Duration/Length'] = resources_df1['Duration/Length'].astype(float)
resources_df1['Price'] = resources_df1['Price'].astype(float)
resources_df1['Ratings'] = resources_df1['Ratings'].astype(float)
resources_df1['Reviews'] = resources_df1['Reviews'].astype(float)
resources_df1['Resource ID'] = resources_df1['Resource ID'].astype(int)

# Convert all remaining columns to integer
resources_df1.iloc[:, 5:] = resources_df1.iloc[:, 5:].astype(int)

resources_df1

  resources_df1.iloc[:, 5:] = resources_df1.iloc[:, 5:].astype(int)


Unnamed: 0,Duration/Length,Price,Ratings,Reviews,Resource ID,Resource ID.1,Resource ID.2,Title_Advanced Aerospace Engineering Concepts,Title_Advanced Agriculture Concepts,Title_Advanced Anthropology Concepts,...,Tags/Keywords_Software Engineering,Tags/Keywords_Sports Science,Tags/Keywords_Statistics,Tags/Keywords_Supply Chain Management,Tags/Keywords_Tax Law,Tags/Keywords_Theater Arts,Tags/Keywords_Urban Planning,Tags/Keywords_Wilderness Survival,Tags/Keywords_Zoology,Unique_Resource_ID
0,6.0,50.0,4.343940,40.0,575,575,575,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,10.0,10.0,3.739550,9.0,1377,1377,1377,0,0,0,...,0,0,0,0,0,0,0,0,0,2
2,12.0,40.0,3.655596,53.0,1176,1176,1176,0,0,0,...,0,0,0,0,0,0,0,0,0,3
3,4.0,10.0,4.431646,100.0,118,118,118,0,0,0,...,0,0,1,0,0,0,0,0,0,4
4,6.0,30.0,4.246014,26.0,1641,1641,1641,0,0,0,...,0,0,0,0,0,0,0,0,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,2.0,20.0,3.081263,9.0,723,723,723,0,0,0,...,0,0,0,0,0,0,0,0,0,1996
1996,4.0,10.0,3.693427,29.0,341,341,341,0,0,0,...,0,0,0,0,0,0,1,0,0,1997
1997,2.0,50.0,4.708298,77.0,1264,1264,1264,0,0,0,...,0,0,0,0,0,0,0,0,0,1998
1998,10.0,40.0,3.888878,97.0,1192,1192,1192,0,0,0,...,0,0,0,0,0,0,0,0,0,1999


In [79]:
import pandas as pd

preferences_df1 = pd.read_csv('user_preferences_dataset.csv')


# Convert 'User ID' and 'Resource ID' columns to integer
preferences_df1['User ID'] = preferences_df1['User ID'].astype(int)
preferences_df1['Resource ID'] = preferences_df1['Resource ID'].astype(int)

# Convert 'Ratings' and 'Time Spent (minutes)' columns to float
preferences_df1['Ratings'] = pd.to_numeric(preferences_df1['Ratings'], errors='coerce')
preferences_df1['Time Spent (minutes)'] = pd.to_numeric(preferences_df1['Time Spent (minutes)'], errors='coerce')

# Convert remaining columns to integer
for column in preferences_df.columns[4:]:
    preferences_df1[column] = pd.to_numeric(preferences_df1[column], errors='coerce').fillna(0).astype(int)

# Print the updated DataFrame
preferences_df1
# Save the updated DataFrame
preferences_df1.to_csv('updated_preferences_dataset.csv', index=False)


Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives
0,1001,575,3.289044,44,0,0,0
1,1001,1377,3.416622,22,0,0,0
2,1001,1176,1.742436,78,0,0,0
3,1001,118,1.764804,99,0,0,0
4,1001,1641,3.979411,118,0,0,0
...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,0,0,0
9919,2000,1561,1.532473,53,0,0,0
9920,2000,104,4.579405,92,0,0,0
9921,2000,1030,2.768971,8,0,0,0


In [80]:
import pandas as pd

# Read the preferences dataset
preferences_df1 = pd.read_csv('updated_preferences_dataset.csv')

# Create a new column 'Unique_Resource_ID' with unique numerical IDs
preferences_df1['Unique_Resource_ID'] = range(1, len(preferences_df1) + 1)
preferences_df1['Unique_User_ID'] = range(1, len(preferences_df1) + 1)

# Print the updated DataFrame
preferences_df1

# Save the updated DataFrame if needed
# preferences_df1.to_csv('updated_user_preferences_dataset.csv', index=False)


Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives,Unique_Resource_ID,Unique_User_ID
0,1001,575,3.289044,44,0,0,0,1,1
1,1001,1377,3.416622,22,0,0,0,2,2
2,1001,1176,1.742436,78,0,0,0,3,3
3,1001,118,1.764804,99,0,0,0,4,4
4,1001,1641,3.979411,118,0,0,0,5,5
...,...,...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,0,0,0,9919,9919
9919,2000,1561,1.532473,53,0,0,0,9920,9920
9920,2000,104,4.579405,92,0,0,0,9921,9921
9921,2000,1030,2.768971,8,0,0,0,9922,9922


In [81]:
preferences_df1

Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives,Unique_Resource_ID,Unique_User_ID
0,1001,575,3.289044,44,0,0,0,1,1
1,1001,1377,3.416622,22,0,0,0,2,2
2,1001,1176,1.742436,78,0,0,0,3,3
3,1001,118,1.764804,99,0,0,0,4,4
4,1001,1641,3.979411,118,0,0,0,5,5
...,...,...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,0,0,0,9919,9919
9919,2000,1561,1.532473,53,0,0,0,9920,9920
9920,2000,104,4.579405,92,0,0,0,9921,9921
9921,2000,1030,2.768971,8,0,0,0,9922,9922


In [75]:
preferences_df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9923 entries, 0 to 9922
Data columns (total 9 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   User ID                      9923 non-null   int64  
 1   Resource ID                  9923 non-null   int64  
 2   Ratings                      9923 non-null   float64
 3   Time Spent (minutes)         9923 non-null   int64  
 4   Topics/Subjects of Interest  9923 non-null   object 
 5   Preferred Learning Formats   9923 non-null   object 
 6   Learning Goals/Objectives    9923 non-null   object 
 7   Unique_Resource_ID           9923 non-null   int64  
 8   Unique_User_ID               9923 non-null   int64  
dtypes: float64(1), int64(5), object(3)
memory usage: 697.8+ KB


In [34]:
import pandas as pd

# Load the dataset
interactions_df1 = pd.read_csv('interaction_dataset.csv')

# Define a mapping for Interaction Type
interaction_type_map = {
    'View': 0,
    'Bookmark': 1,
    'Like': 2
}

# Map the Interaction Type column using the defined mapping
interactions_df1['Interaction Type'] = interactions_df1['Interaction Type'].map(interaction_type_map)

# Convert 'Duration Spent (minutes)' column to integer
interactions_df1['Duration Spent (minutes)'] = interactions_df1['Duration Spent (minutes)'].astype(int)

# Print the updated DataFrame
interactions_df1
# Save the updated DataFrame
# interactions_df.to_csv('updated_interaction_dataset.csv', index=False)


Unnamed: 0,User ID,Resource ID,Interaction Type,Timestamp,Duration Spent (minutes)
0,1243,1618,0,2024-04-03 19:35:37.842203,99
1,1984,1616,1,2024-04-10 19:35:37.842203,8
2,1064,1158,0,2024-04-02 19:35:37.842203,8
3,1298,1574,0,2024-03-24 19:35:37.842203,63
4,1785,152,2,2024-03-17 19:35:37.842203,97
...,...,...,...,...,...
4995,1620,1043,2,2024-03-28 19:35:37.898592,35
4996,1017,160,0,2024-03-26 19:35:37.898592,11
4997,1468,1521,1,2024-04-08 19:35:37.898592,111
4998,1386,1575,0,2024-03-31 19:35:37.898592,98


In [52]:
import pandas as pd

# # Read the interactions dataset
# interactions_df1 = pd.read_csv('interactions_dataset.csv')

# Create a new column 'Unique_Resource_ID' with unique numerical IDs
interactions_df1['Unique_Resource_ID'] = range(1, len(interactions_df1) + 1)

# Print the updated DataFrame
interactions_df1

# Save the updated DataFrame if needed
# interactions_df1.to_csv('updated_interactions_dataset.csv', index=False)


Unnamed: 0,User ID,Resource ID,Interaction Type,Timestamp,Duration Spent (minutes),Unique_Resource_ID
0,1243,1618,0,2024-04-03 19:35:37.842203,99,1
1,1984,1616,1,2024-04-10 19:35:37.842203,8,2
2,1064,1158,0,2024-04-02 19:35:37.842203,8,3
3,1298,1574,0,2024-03-24 19:35:37.842203,63,4
4,1785,152,2,2024-03-17 19:35:37.842203,97,5
...,...,...,...,...,...,...
4995,1620,1043,2,2024-03-28 19:35:37.898592,35,4996
4996,1017,160,0,2024-03-26 19:35:37.898592,11,4997
4997,1468,1521,1,2024-04-08 19:35:37.898592,111,4998
4998,1386,1575,0,2024-03-31 19:35:37.898592,98,4999


In [35]:
import pandas as pd

# Load the dataset
ratings_df1 = pd.read_csv('external_metadata_dataset.csv')
import pandas as pd

# Load the dataset
ratings_df1 = pd.read_csv('external_metadata_dataset.csv')

# Convert 'Topics/Subjects of Interest' column to integers
topics_mapping = {topic: i for i, topic in enumerate(ratings_df['Topics/Subjects of Interest'].unique())}
ratings_df1['Topics/Subjects of Interest'] = ratings_df1['Topics/Subjects of Interest'].map(topics_mapping)

# Convert 'Preferred Learning Formats' column to integers
formats_mapping = {format: i for i, format in enumerate(ratings_df1['Preferred Learning Formats'].unique())}
ratings_df1['Preferred Learning Formats'] = ratings_df1['Preferred Learning Formats'].map(formats_mapping)

# Print the updated DataFrame
ratings_df1


Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives,Publication Year,Citation Count,Popularity Score
0,1001,575,3.289044,44,,0,Gain new skills,2021,580,6.062141
1,1001,1377,3.416622,22,,1,Gain new skills,2016,151,0.038819
2,1001,1176,1.742436,78,,2,Gain new skills,2009,325,1.022936
3,1001,118,1.764804,99,,2,Gain new skills,2005,912,3.527955
4,1001,1641,3.979411,118,,2,Gain new skills,2013,225,7.902008
...,...,...,...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,,0,Gain new skills,2020,335,5.686753
9919,2000,1561,1.532473,53,,0,Advance career,2013,411,7.428258
9920,2000,104,4.579405,92,,2,Gain new skills,2001,405,0.839829
9921,2000,1030,2.768971,8,,1,Advance career,2009,750,1.344146


In [57]:
import pandas as pd


# Create a new column 'Unique_Resource_ID' with unique numerical IDs
ratings_df1['Unique_Resource_ID'] = range(1, len(ratings_df1) + 1)
ratings_df1['Unique_User_ID'] = range(1, len(ratings_df1) + 1)

ratings_df1

# Save the updated DataFrame if needed
# df.to_csv('updated_dataset.csv', index=False)


Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives,Publication Year,Citation Count,Popularity Score,Unique_Resource_ID,Unique_User_ID
0,1001,575,3.289044,44,,0,Gain new skills,2021,580,6.062141,1,1
1,1001,1377,3.416622,22,,1,Gain new skills,2016,151,0.038819,2,2
2,1001,1176,1.742436,78,,2,Gain new skills,2009,325,1.022936,3,3
3,1001,118,1.764804,99,,2,Gain new skills,2005,912,3.527955,4,4
4,1001,1641,3.979411,118,,2,Gain new skills,2013,225,7.902008,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...
9918,2000,1195,4.373124,16,,0,Gain new skills,2020,335,5.686753,9919,9919
9919,2000,1561,1.532473,53,,0,Advance career,2013,411,7.428258,9920,9920
9920,2000,104,4.579405,92,,2,Gain new skills,2001,405,0.839829,9921,9921
9921,2000,1030,2.768971,8,,1,Advance career,2009,750,1.344146,9922,9922


In [36]:
ratings_df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9923 entries, 0 to 9922
Data columns (total 10 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   User ID                      9923 non-null   int64  
 1   Resource ID                  9923 non-null   int64  
 2   Ratings                      9923 non-null   float64
 3   Time Spent (minutes)         9923 non-null   int64  
 4   Topics/Subjects of Interest  0 non-null      float64
 5   Preferred Learning Formats   9923 non-null   int64  
 6   Learning Goals/Objectives    9923 non-null   object 
 7   Publication Year             9923 non-null   int64  
 8   Citation Count               9923 non-null   int64  
 9   Popularity Score             9923 non-null   float64
dtypes: float64(3), int64(6), object(1)
memory usage: 775.4+ KB


In [83]:
ratings_df1.head()

Unnamed: 0,User ID,Resource ID,Ratings,Time Spent (minutes),Topics/Subjects of Interest,Preferred Learning Formats,Learning Goals/Objectives,Publication Year,Citation Count,Popularity Score,Unique_Resource_ID,Unique_User_ID
0,1001,575,3.289044,44,,0,Gain new skills,2021,580,6.062141,1,1
1,1001,1377,3.416622,22,,1,Gain new skills,2016,151,0.038819,2,2
2,1001,1176,1.742436,78,,2,Gain new skills,2009,325,1.022936,3,3
3,1001,118,1.764804,99,,2,Gain new skills,2005,912,3.527955,4,4
4,1001,1641,3.979411,118,,2,Gain new skills,2013,225,7.902008,5,5


In [64]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.svm import SVR
# from sklearn.metrics import mean_squared_error

# # # Read the datasets
# # resources_df = pd.read_csv('resources_dataset.csv')
# # preferences_df = pd.read_csv('preferences_dataset.csv')
# # interactions_df = pd.read_csv('interactions_dataset.csv')
# # ratings_df = pd.read_csv('ratings_dataset.csv')

# # Merge datasets
# merged_df = pd.merge(resources_df1, preferences_df1, on='Unique_Resource_ID')
# merged_df = pd.merge(merged_df, interactions_df1, on=['Unique_User_ID', 'Unique_Resource_ID'])
# merged_df = pd.merge(merged_df, ratings_df1, on=['Unique_User_ID', 'Unique_Resource_ID'])

# # Define features and target variable
# X = merged_df.drop(columns=['Popularity_Score'])
# y = merged_df['Popularity_Score']

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Feature scaling
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# # Model selection and training
# model = SVR(kernel='rbf')
# model.fit(X_train_scaled, y_train)

# # Predictions
# y_pred = model.predict(X_test_scaled)

# # Evaluation
# mse = mean_squared_error(y_test, y_pred)
# print("Mean Squared Error:", mse)


In [103]:
import pandas as pd

# # Read the datasets
# resources_df1 = pd.read_csv('resources_dataset.csv')
# preferences_df1 = pd.read_csv('preferences_dataset.csv')
# interactions_df1 = pd.read_csv('interactions_dataset.csv')
# ratings_df1 = pd.read_csv('ratings_dataset.csv')

# Merge datasets
merged_df = pd.merge(resources_df1, preferences_df1, on='Unique_Resource_ID')
merged_df = pd.merge(merged_df, interactions_df1, on='Unique_Resource_ID')  # Adjust this line accordingly
merged_df = pd.merge(merged_df, ratings_df1, on='Unique_Resource_ID')

# Define features and target variable
X = merged_df.drop(columns=['Popularity Score'])
y = merged_df['Popularity Score']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


  merged_df = pd.merge(merged_df, ratings_df1, on='Unique_Resource_ID')


In [104]:
merged_df

Unnamed: 0,Duration/Length,Price,Ratings_x,Reviews,Resource ID_x,Resource ID.1,Resource ID_x.1,Title_Advanced Aerospace Engineering Concepts,Title_Advanced Agriculture Concepts,Title_Advanced Anthropology Concepts,...,Resource ID_y,Ratings,Time Spent (minutes)_y,Topics/Subjects of Interest_y,Preferred Learning Formats_y,Learning Goals/Objectives_y,Publication Year,Citation Count,Popularity Score,Unique_User_ID
0,6.0,50.0,4.343940,40.0,575,575,575,0,0,0,...,575,3.289044,44,,0,Gain new skills,2021,580,6.062141,1
1,10.0,10.0,3.739550,9.0,1377,1377,1377,0,0,0,...,1377,3.416622,22,,1,Gain new skills,2016,151,0.038819,2
2,12.0,40.0,3.655596,53.0,1176,1176,1176,0,0,0,...,1176,1.742436,78,,2,Gain new skills,2009,325,1.022936,3
3,4.0,10.0,4.431646,100.0,118,118,118,0,0,0,...,118,1.764804,99,,2,Gain new skills,2005,912,3.527955,4
4,6.0,30.0,4.246014,26.0,1641,1641,1641,0,0,0,...,1641,3.979411,118,,2,Gain new skills,2013,225,7.902008,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,2.0,20.0,3.081263,9.0,723,723,723,0,0,0,...,723,3.939688,45,,1,Gain new skills,2000,986,8.496108,1996
1996,4.0,10.0,3.693427,29.0,341,341,341,0,0,0,...,341,1.325149,51,,1,Personal interest,2010,362,7.566799,1997
1997,2.0,50.0,4.708298,77.0,1264,1264,1264,0,0,0,...,1264,1.959089,59,,0,Advance career,2018,917,7.247118,1998
1998,10.0,40.0,3.888878,97.0,1192,1192,1192,0,0,0,...,1192,3.728296,36,,2,Advance career,2006,79,2.641602,1999


In [85]:
preferences_df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9923 entries, 0 to 9922
Data columns (total 9 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   User ID                      9923 non-null   int64  
 1   Resource ID                  9923 non-null   int64  
 2   Ratings                      9923 non-null   float64
 3   Time Spent (minutes)         9923 non-null   int64  
 4   Topics/Subjects of Interest  9923 non-null   int64  
 5   Preferred Learning Formats   9923 non-null   int64  
 6   Learning Goals/Objectives    9923 non-null   int64  
 7   Unique_Resource_ID           9923 non-null   int64  
 8   Unique_User_ID               9923 non-null   int64  
dtypes: float64(1), int64(8)
memory usage: 697.8 KB


In [86]:
# Using dot notation
resources_df1.Unique_Resource_ID

# Using bracket notation
resources_df1['Unique_Resource_ID']


0          1
1          2
2          3
3          4
4          5
        ... 
1995    1996
1996    1997
1997    1998
1998    1999
1999    2000
Name: Unique_Resource_ID, Length: 2000, dtype: int64

In [88]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Assuming X_train and X_test are your DataFrames

# Select only numeric columns
X_train_numeric = X_train.select_dtypes(include=['number'])
X_test_numeric = X_test.select_dtypes(include=['number'])

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_numeric)
X_test_scaled = scaler.transform(X_test_numeric)


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


In [109]:
from sklearn.impute import SimpleImputer

# Create an imputer object
imputer = SimpleImputer(strategy='mean')  # You can choose other strategies like 'median' or 'most_frequent'

# Fit and transform the imputer on the training data
X_train_imputed = imputer.fit_transform(X_train_scaled)

# Transform the test data using the fitted imputer
X_test_imputed = imputer.transform(X_test_scaled)


In [111]:
from sklearn.svm import SVR

# Train the model (Support Vector Regressor)
svr_model = SVR()
svr_model.fit(X_train_imputed, y_train)

# Step 5: Model Evaluation
# Predict on the test set
y_pred = svr_model.predict(X_test_imputed)

# Evaluate the model's performance (you can use appropriate regression metrics)
# For example, you can use mean squared error for evaluation
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 8.313710314608457


In [135]:
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, y_pred)
mae

2.4394120045828305

In [112]:
# Step 6: Deployment Preparation


# Save the trained model and preprocessing steps for future use
import joblib

# Save the model
joblib.dump(svm_model, 'svm_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [138]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import joblib

# # Load the merged dataset
# merged_df = pd.read_csv('merged_dataset.csv')

# Load the trained model and preprocessing steps
svm_model = joblib.load('svm_model.pkl')
scaler = joblib.load('scaler.pkl')

# Function to preprocess user preferences
def preprocess_preferences(preferences):
    # Convert user preferences to lowercase and split by comma
    return preferences.lower().split(',')

# Function to get recommendations based on user preferences
def get_recommendations(user_preferences):
    # Preprocess user preferences
    user_preferences = preprocess_preferences(user_preferences)
    # Select relevant features from the merged dataset
    user_df = merged_df[merged_df['Category'].isin(user_preferences)] if 'Category' in merged_df.columns else merged_df
    X_user = user_df.drop(columns=['Popularity Score'])
    # Scale the features
    X_user_scaled = scaler.transform(X_user)
    # Get recommendations using the trained model
    recommendations = svm_model.predict(X_user_scaled)
    return recommendations

# Function to provide educational resource recommendations
def provide_recommendations(preferences):
    recommendations = get_recommendations(preferences)
    return recommendations

# Conversation loop
print("Welcome to the Educational Resource Recommendation System!")
print("You can find the machine learning model used for recommendation at: [ML Model Link]")

while True:
    print("\nPlease provide your preferences to receive personalized recommendations.")
    user_preferences = input("Enter your preferences separated by commas (e.g., technology, humanities): ")
    
    # Get recommendations based on user preferences
    recommendations = provide_recommendations(user_preferences)
    
    print("\nRecommended educational resources based on your preferences:")
    for recommendation in recommendations:
        print(recommendation)
    
    # Ask if the user wants to continue or exit
    choice = input("\nDo you want to continue (Y/N)? ")
    if choice.lower() != 'y':
        print("Thank you for using the system. Goodbye!")
        break


Welcome to the Educational Resource Recommendation System!
You can find the machine learning model used for recommendation at: [ML Model Link]

Please provide your preferences to receive personalized recommendations.
Enter your preferences separated by commas (e.g., technology, humanities): HISTORY


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Learning Goals/Objectives_y
- Timestamp
