# Task for Today  

***

## Finance/Accounting Course Rating Prediction  

Given *data about finance and accounting courses on Udemy*, let's try to predict the **rating** of a given course.  
  
We will use a TensorFlow ANN to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow_addons.metrics import RSquare

In [None]:
data = pd.read_csv('../input/finance-accounting-courses-udemy-13k-course/udemy_output_All_Finance__Accounting_p1_p626.csv')

In [None]:
data

In [None]:
data.info()

# Cleaning

In [None]:
data = data.drop(['id', 'title', 'url'], axis=1)

In [None]:
data

In [None]:
data['discount_price__currency'].unique()

In [None]:
data['price_detail__currency'].unique()

In [None]:
data = data.drop(['discount_price__currency', 'price_detail__currency'], axis=1)

In [None]:
data

In [None]:
data.dtypes

In [None]:
data.isna().mean()

In [None]:
data = data.drop(['discount_price__price_string', 'price_detail__price_string'], axis=1)

In [None]:
data

In [None]:
for column in ['discount_price__amount', 'price_detail__amount']:
    data[column] = data[column].fillna(data[column].mean())

In [None]:
print("Total missing values:", data.isna().sum().sum())

# Feature Engineering/Encoding/Splitting/Scaling

In [None]:
data['created_year'] = data['created'].apply(lambda x: np.int(x[0:4]))
data['created_month'] = data['created'].apply(lambda x: np.int(x[5:7]))

data['published_year'] = data['published_time'].apply(lambda x: np.int(x[0:4]))
data['published_month'] = data['published_time'].apply(lambda x: np.int(x[5:7]))

data = data.drop(['created', 'published_time'], axis=1)

In [None]:
data['is_paid'] = data['is_paid'].astype(np.int)
data['is_wishlisted'] = data['is_wishlisted'].astype(np.int)

In [None]:
data

In [None]:
y = data['rating'].copy()
X = data.drop('rating', axis=1).copy()

In [None]:
X

In [None]:
(data['avg_rating_recent'] == data['rating']).all()

In [None]:
X = X.drop('avg_rating_recent', axis=1)

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=32)

# Modeling/Training

In [None]:
X.shape

In [None]:
inputs = tf.keras.Input(shape=(13,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs, outputs)


model.compile(
    optimizer='adam',
    loss='mse'
)


batch_size = 32
epochs = 100

history = model.fit(
    X_train,
    y_train,
    validation_split=0.12,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
    verbose=0
)

# Results

In [None]:
fig = px.line(
    history.history,
    y=['loss', 'val_loss'],
    labels={'x': "epoch", 'y': "loss"}
)

fig.show()

In [None]:
model.evaluate(X_test, y_test)

In [None]:
y_pred = np.squeeze(model.predict(X_test))

In [None]:
rsquared = RSquare()

rsquared.update_state(y_test, y_pred)

In [None]:
print("R^2 Score:", rsquared.result().numpy())

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/h1AFMLZcDSA