# SVM Model

In [None]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import os

# Read the CSV

In [None]:
df = pd.read_csv("clean_movies.csv")
df.head()

In [None]:
df.count()

In [None]:
np.unique(df.performance)

# Select your features (columns)

In [None]:
df.columns

In [None]:
# Set features. This will also be used as your x values.
selected_features = df[['lifetime_gross', 'ratingCount',
       'duration', 'nrOfWins', 'nrOfNominations', 'nrOfPhotos',
       'nrOfNewsArticles', 'nrOfUserReviews', 'Action', 'Adult', 'Adventure',
       'Animation', 'Biography', 'Comedy', 'Crime', 'Documentary', 'Drama',
       'Family', 'Fantasy', 'Horror', 'Music', 'Musical', 'Mystery', 'News',
       'RealityTV', 'Romance', 'SciFi', 'Short', 'Sport', 'TalkShow',
       'Thriller', 'War', 'Western']]

In [None]:
selected_features.head()

# Create a Train Test Split

In [None]:
y = df["performance"].values.reshape(-1, 1)
X = selected_features

print(X.shape, y.shape)

In [None]:
df.columns

In [None]:
np.unique(df.performance)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [None]:
X_train.head()

In [None]:
y_train

In [None]:
labels = np.unique(y_train)
print(labels)

# Pre-processing

In [None]:
# Scale the data using the MinMaxScaler and perform some feature selection
# Scale your data
# Import dependencies
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [None]:
# scale data
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
X_test_scaled

In [None]:
# Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

print(encoded_y_test)

# Train the Model

In [None]:
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model.fit(X_train_scaled, encoded_y_train)
predictions = model.predict(X_test_scaled)
model

In [None]:
SVC(kernel='linear')

In [None]:
predictions

In [None]:
print(f"Training Data Score: {model.score(X_train_scaled, encoded_y_train)}")
print(f"Testing Data Score: {model.score(X_test_scaled, encoded_y_test)}")

# Hyperparameter Tuning
Use GridSearchCV to tune the model's parameters

In [None]:
# Create the GridSearchCV model
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [None]:
# Train the model with GridSearch
grid.fit(X_train_scaled, encoded_y_train)

In [None]:
print(grid.best_params_)
print(grid.best_score_)