# Support Vector Regression (SVR)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
data = pd.read_csv('ratings_sample.csv')
data = data.drop('overview', axis=1)

In [3]:
data.head()

Unnamed: 0,user_id,movie_id,rating,genres,production_companies,production_countries,release_year
0,93787,gods+and+monsters+1998,4,Drama,Lions Gate Films Flashpoint (I) Showtime Networks,United Kingdom United States of America,1998.0
1,197858,stargate+1994,3,Action Adventure Science Fiction,Centropolis Entertainment StudioCanal Carolco ...,France United States of America,1994.0
2,76204,toy+story+1995,3,Animation Comedy Family,Pixar Animation Studios,United States of America,1995.0
3,667201,friends+with+benefits+2011,4,Romance Comedy,Castle Rock Entertainment Screen Gems Olive Br...,United States of America,2011.0
4,336750,sleeper+1973,4,Comedy Romance Science Fiction,Rollins-Joffe Productions,United States of America,1973.0


In [4]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
le = LabelEncoder()
data['user_id'] = le.fit_transform(data['user_id'])
data['movie_id'] = le.fit_transform(data['movie_id'])

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
combined_text = data[['genres', 'production_companies', 'production_countries']].astype(str).apply(lambda x: ' '.join(x), axis=1)
vectorizer = TfidfVectorizer()
X_text = vectorizer.fit_transform(combined_text)
X_year = data[['release_year']]

In [6]:
import scipy.sparse as sp
X = sp.hstack((X_text, X_year))
y = data['rating'].values

In [7]:
print(X)

  (0, 48)	0.06685363122711019
  (0, 1056)	0.06668601762694008
  (0, 1362)	0.06685363122711019
  (0, 803)	0.1883048467020235
  (0, 1495)	0.12896611450427223
  (0, 1018)	0.4292230094865342
  (0, 1308)	0.4605860802044465
  (0, 557)	0.4605860802044465
  (0, 544)	0.15460846350897628
  (0, 592)	0.3754720098430041
  (0, 859)	0.38666597430581295
  (0, 411)	0.10778745648669322
  (1, 579)	0.3046631552989525
  (1, 221)	0.43533855908927765
  (1, 1375)	0.45558857301689093
  (1, 460)	0.3657204481739613
  (1, 237)	0.43533855908927765
  (1, 509)	0.21740478343908054
  (1, 1275)	0.21740478343908054
  (1, 20)	0.18737779239011312
  (1, 17)	0.17512661480657477
  (1, 48)	0.08219436603878
  (1, 1056)	0.08198829056684852
  (1, 1362)	0.08219436603878
  (1, 1495)	0.07927982240899417
  :	:
  (1973, 1612)	1998.0
  (1974, 1612)	1996.0
  (1975, 1612)	1995.0
  (1976, 1612)	1990.0
  (1977, 1612)	1997.0
  (1978, 1612)	1989.0
  (1979, 1612)	2014.0
  (1980, 1612)	1994.0
  (1981, 1612)	2000.0
  (1982, 1612)	1996.0
  (198

In [8]:
print(y)

[4 3 3 ... 3 5 4]


## Splitting the dataset into the Training set and Test set

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
print(X_test)
print(y_test)

  (0, 48)	0.14858718006983987
  (0, 411)	0.2395656587725282
  (0, 495)	0.4274363406131031
  (0, 1000)	0.4597705129955782
  (0, 1056)	0.14821464634603923
  (0, 1091)	0.4262896194256539
  (0, 1117)	0.22894527004045898
  (0, 1235)	0.3612287183911696
  (0, 1362)	0.14858718006983987
  (0, 1439)	0.3118149063967728
  (0, 1495)	0.14331840265830698
  (0, 1612)	1990.0
  (1, 17)	0.18863921317580248
  (1, 20)	0.20183567964315374
  (1, 48)	0.08853640295716636
  (1, 91)	0.45988512495648753
  (1, 387)	0.4791058440291911
  (1, 509)	0.23417952396270617
  (1, 716)	0.4738582312012423
  (1, 1056)	0.08831442690331
  (1, 1091)	0.2540067689836161
  (1, 1117)	0.1364181665868611
  (1, 1275)	0.23417952396270617
  (1, 1362)	0.08853640295716636
  (1, 1439)	0.18579644749849739
  :	:
  (397, 1612)	1998.0
  (398, 48)	0.19345798687079502
  (398, 295)	0.3710861050580075
  (398, 411)	0.31191042220281273
  (398, 1056)	0.1929729542844435
  (398, 1091)	0.5550218501975857
  (398, 1117)	0.2980828563056359
  (398, 1235)	0.47

## Feature Scaling

## Training the Random Forest model on the Training set

In [11]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [14]:
y_pred = model.predict(X_test)

In [15]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 1.1238373276042202


## Evaluating the Model Performance

In [16]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

-0.01511817144270644

In [17]:
from sklearn.metrics import  accuracy_score
y_pred_rounded = y_pred.round()
accuracy = accuracy_score(y_test, y_pred_rounded)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.35
