# Predicting Dislikes

## Import Libraries

In [42]:
import pandas as pd

## Import Dataset

In [43]:
dataset = pd.read_csv("./videos_dataset.csv")
features = ["category_id", "views", "likes", "comment_count", "comments_disabled", "ratings_disabled"]
target = ["dislikes"]

X = dataset[features].values
y = dataset[target].values.reshape(-1, 1)

In [44]:
X

array([[10, 17158579, 787425, 125882, False, False],
       [23, 1014651, 127794, 13030, False, False],
       [23, 3191434, 146035, 8181, False, False],
       ...,
       [20, 773347, 25900, 3881, False, False],
       [25, 115225, 2115, 1672, False, False],
       [24, 107392, 300, 251, False, False]], dtype=object)

## Data Preprocessing

In [45]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer(transformers=[("encoder", OneHotEncoder(), [4, 5])], remainder="passthrough")
X = ct.fit_transform(X)

In [46]:
X[0]

array([1.0, 0.0, 1.0, 0.0, 10, 17158579, 787425, 125882], dtype=object)

## Split Dataset

In [47]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

## Feature Scaling

In [48]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train[:, 4:] = sc.fit_transform(X_train[:, 4:])
X_test[:, 4:] = sc.transform(X_test[:, 4:])

In [49]:
X_train[0]

array([1.0, 0.0, 1.0, 0.0, 0.3214616987443592, 0.005754610379674751,
       -0.025061584670979912, -0.06766622852479587], dtype=object)

## Train Model

In [50]:
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=128, random_state=21)
regressor.fit(X_train, y_train.ravel())

RandomForestRegressor(n_estimators=128, random_state=21)

## Accuracy

In [51]:
from sklearn.metrics import r2_score
y_pred = regressor.predict(X_test)
r2_score(y_test, y_pred)

0.9192493549976333