# Task for Today  

***

## Movie Preference Prediction  

Given *data about someone's personality*, let's try to classify **how strongly he/she will enjoy watching movies on a recommendation list**.  
  
We will use logistic regression, support vector machine, and neural network models to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [None]:
data = pd.read_csv('../input/top-personality-dataset/2018-personality-data.csv')

In [None]:
data

In [None]:
data.info()

# Preprocessing

In [None]:
data.isna().sum()

In [None]:
data.columns

In [None]:
data = data.drop(['userid',
                  ' movie_1', ' predicted_rating_1',
                  ' movie_2', ' predicted_rating_2',
                  ' movie_3', ' predicted_rating_3',
                  ' movie_4', ' predicted_rating_4',
                  ' movie_5', ' predicted_rating_5',
                  ' movie_6', ' predicted_rating_6',
                  ' movie_7', ' predicted_rating_7',
                  ' movie_8', ' predicted_rating_8',
                  ' movie_9', ' predicted_rating_9',
                  ' movie_10', ' predicted_rating_10',
                  ' movie_11', ' predicted_rating_11',
                  ' movie_12', ' predicted_rating_12',
                  ], axis=1)

In [None]:
data

## Encoding

In [None]:
{column: list(data[column].unique()) for column in data.columns if data.dtypes[column] == 'object'}

In [None]:
data[' assigned condition'].mode()

In [None]:
condition_ordering = [' low', ' medium', ' default', ' high']

In [None]:
def ordinal_encode(df, column, ordering):
    df = df.copy()
    df[column] = df[column].apply(lambda x: ordering.index(x))
    return df

def onehot_encode(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column], prefix=prefix)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [None]:
data = ordinal_encode(data, ' assigned condition', condition_ordering)
data = onehot_encode(data, ' assigned metric', 'm')

In [None]:
data

## Splitting and Scaling

In [None]:
y = data[' enjoy_watching ']
X = data.drop(' enjoy_watching ', axis=1)

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)

# Training

In [None]:
log_model = LogisticRegression()
svm_model = SVC(C=1.0)
ann_model = MLPClassifier(hidden_layer_sizes=(16))

log_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
ann_model.fit(X_train, y_train)

log_acc = log_model.score(X_test, y_test)
svm_acc = svm_model.score(X_test, y_test)
ann_acc = ann_model.score(X_test, y_test)

# Results

In [None]:
fig = px.bar(
    x=['Logistic Regression', 'Support Vector Machine', 'Neural Network'],
    y=[log_acc, svm_acc, ann_acc],
    color=['Logistic Regression', 'Support Vector Machine', 'Neural Network'],
    labels={'x': "Model", 'y': "Accuracy"},
    title="Model Accuracy"
)

fig.show()

In [None]:
1/5

In [None]:
print("Logistic Regression:", log_acc)
print("Support Vector Machine:", svm_acc)
print("Neural Network:", ann_acc)

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/SFV-zQMuR0o