## __SOCRATES: PERSONALIZED LEARNING__

### __GROUP MEMBERS:__
_Deepti More_

_Nagaraju Medaraboina_

_Praiselin Gladston_

_Shireesha Thyaranahalli Narayana_

_Sudharsan Tirumal_

### __LOADING REQUIRED LIBRARIES:__

In [1]:
import pandas as pd
import plotly.express  as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

plt.style.use('ggplot')

### __LOADING EXISTING DATA:__

In [2]:
courses_df = pd.read_csv('data/udemy_courses.csv')

# Print sample
courses_df.sample(5).reset_index(drop=True).style.set_properties(**{'background-color': '#161717','color': '#30c7e6','border-color': '#8b8c8c'})

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject
0,413618,SASS - Beginner Crash Course,https://www.udemy.com/sass-beginner-crash-course/,True,25,1143,34,13,Beginner Level,1.0,2015-02-12T17:13:26Z,Web Development
1,1107986,Building Data Visualizations with D3 and Angular 2,https://www.udemy.com/building-data-visualizations-with-d3-and-angular-2/,True,125,293,30,31,Beginner Level,3.5,2017-02-23T14:19:16Z,Web Development
2,919906,Trading Options For Consistent Returns: Options Basics,https://www.udemy.com/trading-options-for-income/,False,0,4077,281,20,Beginner Level,1.5,2016-08-18T21:57:04Z,Business Finance
3,1147458,Complete Trading Plan Course for Stocks & Forex (10 Hours),https://www.udemy.com/killer-trading-plan-6-year-old-can-invest-10-hours-stocks-forex-etf/,True,200,1087,14,61,All Levels,9.5,2017-03-23T05:54:27Z,Business Finance
4,486412,Learning Electric Guitar for Absolute Beginners,https://www.udemy.com/electricguitarforabsolutebeginners/,True,35,19,2,12,Beginner Level,1.5,2015-05-04T21:47:44Z,Musical Instruments


### __DATA PREPROCESSING:__

In [3]:
# Handle missing values if any
courses_df.dropna(inplace=True)

In [4]:
temp_df = courses_df['subject'].value_counts().reset_index()

fig = go.Figure(data=[go.Pie(labels=temp_df['subject'],
                             values=temp_df['count'],
                             hole=.7,
                             title = '% of Courses by Subject',
                             marker_colors = px.colors.sequential.Blues_r,
                            )
                     
                     ])
fig.update_layout(title='Amount of Courses by Subject')
fig.show()

In [5]:
# Check if there are any duplicate rows
print("Number of duplicate rows:", courses_df.duplicated().sum())

Number of duplicate rows: 6


In [6]:
# Drop irrelevant columns (e.g., course_id, url)
courses_df.drop(['course_id', 'url'], axis=1, inplace=True)

In [7]:
# Display the first few rows of the processed dataset
courses_df.sample(5).reset_index(drop=True).style.set_properties(**{'background-color': '#161717','color': '#30c7e6','border-color': '#8b8c8c'})

Unnamed: 0,course_title,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject
0,How to Design an Awesome Letterhead in Adobe Illustrator,False,0,4776,115,18,All Levels,0.566667,2017-03-03T03:20:22Z,Graphic Design
1,Instant Harmonica - play 12 bar blues Big Walter style now!,True,40,31,2,14,All Levels,1.0,2017-05-19T12:26:28Z,Musical Instruments
2,ASP.NET MVC 5 Project - Facebook Clone,True,30,210,19,41,All Levels,5.0,2017-02-09T04:38:46Z,Web Development
3,Trading Trends in ANY Market,True,30,632,49,22,All Levels,2.0,2015-10-04T18:43:16Z,Business Finance
4,الدروس التعليمية لسوق الفوركس والأسواق المالية الأخرى,False,0,368,3,69,All Levels,20.5,2017-04-25T15:02:33Z,Business Finance


### __NATURAL LANGUAGE PROCESSING:__

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [9]:
# Combine relevant text columns into a single text feature
courses_df['combined_text'] = courses_df['course_title'] + ' ' + courses_df['subject']

In [10]:
# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the text data
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['combined_text'])

In [11]:
# Compute cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

### __COURSE RECOMMENDATION DEMO:__

In [12]:
# Function to get recommendations based on search query keywords
def get_recommendations(search_query):
    # Transform the search query using TF-IDF vectorizer
    search_query_vec = tfidf_vectorizer.transform([search_query])

    # Compute cosine similarity between search query and course descriptions
    cosine_sim_query = linear_kernel(search_query_vec, tfidf_matrix).flatten()

    # Get indices of courses sorted by similarity score
    sim_scores = list(enumerate(cosine_sim_query))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top 10 most similar courses
    sim_scores = sim_scores[:10]
    course_indices = [i[0] for i in sim_scores]

    return list(courses_df['course_title'].iloc[course_indices])

In [13]:
# Example usage
search_query = "How to trade"
recommendations = get_recommendations(search_query)
print("Recommended courses based on search query:\n\n", search_query)
for course in recommendations:
    print(course)

Recommended courses based on search query:

 How to trade
How to trade options
Learn to Trade for Profit: Find and Trade Winning Stocks
Learn to Trade Forex Big U-Turn Trade
How to trade in the Forex market
Learn To Trade The Forex Naked Price Action Acapulco Trade
Learn to Trade Forex Naked Price Action Wammie Trade
Day Trading - Learn to Day Trade / Swing Trade In One Hour.
Trade for Profit: Find-Trade Stocks Successfully Arabic عربي
WE WILL PAY YOU TO TRADE WITH US
Trade for a Living


### __USER FEEDBACK AND RECOMMENDATION DEMO:__

In [14]:
# Initialize feedback for all courses with 0
courses_df['rating'] = 0

In [15]:
# Function to collect user feedback and suggest alternative courses if rating is low
def collect_feedback(course_title, rating):
    existing_rating = courses_df[courses_df['course_title'] == course_title]['rating'][0]
    updated_rating = (existing_rating + rating) / 2
    courses_df.loc[courses_df['course_title'] == course_title, 'rating'] = updated_rating

    # Check if rating is low (below threshold)
    low_rating_threshold = 3
    if rating < low_rating_threshold:
        # Get recommendations for alternative courses based on the course title
        alternative_courses = get_recommendations(course_title)
        print("Your rating for the course '{}' is low. Here are some alternative courses you may like:\n".format(course_title))
        for course in recommendations[1:]:
            print(course)

In [16]:
# Example usage
collect_feedback('Ultimate Investment Banking Course', 2)

Your rating for the course 'Ultimate Investment Banking Course' is low. Here are some alternative courses you may like:

Learn to Trade for Profit: Find and Trade Winning Stocks
Learn to Trade Forex Big U-Turn Trade
How to trade in the Forex market
Learn To Trade The Forex Naked Price Action Acapulco Trade
Learn to Trade Forex Naked Price Action Wammie Trade
Day Trading - Learn to Day Trade / Swing Trade In One Hour.
Trade for Profit: Find-Trade Stocks Successfully Arabic عربي
WE WILL PAY YOU TO TRADE WITH US
Trade for a Living
