In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [None]:
courses = pd.read_csv('../input/course-reviews-on-coursera/Coursera_courses.csv')
reviews = pd.read_csv('../input/course-reviews-on-coursera/Coursera_reviews.csv')

In [None]:
# merge the reviews & courses two datasets on the column 'course_id' to generate the full dataset df. 
# then we drop the 'course_url' column. 
df = pd.merge(reviews, courses, on='course_id') 
df = df.drop('course_url', 1)

df.head()

In [None]:
# create new columns of month & year to replace the date_reviews column. 
df['year'] = pd.to_datetime(df['date_reviews']).dt.year
df['month'] = pd.to_datetime(df['date_reviews']).dt.month

df = df.drop('date_reviews', 1)

In [None]:
df.head()

In [None]:
# 1454711 number of rows 
df.shape[0]

In [None]:
# define a function to calculate an additional column called ratio measuring the rate of 5s in a course's ratings. 
def perfect_rating_ratio(course):
    
    data = df[df.name == course]
    ratio = data[data.rating==5].rating.count()/data.rating.count()
    
    return ratio

# define a function to extract ratio number from a data type of dictionary. 
def myFunc(e):
    
    return e['ratio']

# define a function takes year, months as input, and it outputs the total counting number 
# of reviews within the timespan. 
def review_counter(year, start_month, end_month):
    
    data = df[(df.year == year) & (df.month >= start_month) & (df.month <= end_month)]
    
    ratings = list(data.groupby(['month']).rating.count())
    
    return ratings

# define a function takes timespan & course name as inputs and it outputs the average course ratings monthly. 
def rating_calculator(course, year, start_month, end_month):
    
    data = df[(df.year == year) & (df.month >= start_month) & (df.month <= end_month)]
    
    data = data.loc[data['name'] == course]
    
    ratings = list(data.groupby(['month']).rating.mean())
    
    return ratings

# define a function takes timespan & course name as inputs and it outputs the number of ratings monthly. 
def rating_counter(course, year, start_month, end_month):
    
    data = df[(df.year == year) & (df.month >= start_month) & (df.month <= end_month)]
    
    data = data.loc[data['name'] == course]
    
    ratings = list(data.groupby(['month']).rating.count())
    
    return ratings

In [None]:
# a list of all unique course names in the dataset. 
course_names = list(df.name.unique())

In [None]:
df.name.value_counts() # top 5 & last 5 courses in terms of number of reviews. 

In [None]:
ratios = []

for name in course_names:
    
    ratios.append({"course" : name , 'ratio' : perfect_rating_ratio(name)}) # a dictionary list of courses & ratios.

ratios.sort(reverse=True, key=myFunc) # set reverse to True so we have a list order by ratio descendingly. 

In [None]:
ratios[:10] # top 10 courses with the highest 5 stars ratios! 

In [None]:
ratios[-10:] # last 10 courses with the lowest 5 stars ratios... 

In [None]:
names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# how many ratings per month from 2016 to 2020 for the course Machine Learning? 
values_2020 = rating_counter('Machine Learning', 2020, 1, 12)
values_2019 = rating_counter('Machine Learning', 2019, 1, 12)
values_2018 = rating_counter('Machine Learning', 2018, 1, 12)
values_2017 = rating_counter('Machine Learning', 2017, 1, 12)
values_2016 = rating_counter('Machine Learning', 2016, 1, 12)

In [None]:
plt.plot(names, values_2016)
plt.plot(names, values_2017)
plt.plot(names, values_2018)
plt.plot(names, values_2019)
plt.plot(names, values_2019)

plt.legend(['2016', '2017', '2018', '2019', '2020'], loc='upper left')

plt.show()

In [None]:
# what's the average ratings per month from 2016 to 2020 for the course Machine Learning? 
ratings_2020 = rating_calculator('Machine Learning', 2020, 1, 10)
ratings_2019 = rating_calculator('Machine Learning', 2019, 1, 12)
ratings_2018 = rating_calculator('Machine Learning', 2018, 1, 12)
ratings_2017 = rating_calculator('Machine Learning', 2017, 1, 12)
ratings_2016 = rating_calculator('Machine Learning', 2016, 1, 12)

In [None]:
# due to the fact that only 10 months recorded in 2020, we have to re-define a list of first 10 months. 
months_2020 = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct'] 

plt.plot(months_2020, ratings_2020)
plt.plot(names, ratings_2019)
plt.plot(names, ratings_2018)
plt.plot(names, ratings_2017)
plt.plot(names, ratings_2016)

plt.legend(['2016', '2017', '2018', '2019', '2020'], loc='upper left')

plt.show()

In [None]:
# how many reviews per month from 2016 to 2020 for all courses on Coursera? 
reviews_2020 = review_counter(2020, 1, 10)
reviews_2019 = review_counter(2019, 1, 12)
reviews_2018 = review_counter(2018, 1, 12)
reviews_2017 = review_counter(2017, 1, 12)
reviews_2016 = review_counter(2016, 1, 12)

In [None]:
plt.plot(months_2020, reviews_2020)
plt.plot(names, reviews_2019)
plt.plot(names, reviews_2018)
plt.plot(names, reviews_2017)
plt.plot(names, reviews_2016)

plt.legend(['2016', '2017', '2018', '2019', '2020'], loc='upper left')

plt.show()