## **Performance comparison of different regression models**
#### This notebook deals with comparison of models like Elastic Net, Linear Regression, LinearSVR, MLPRegressor, RandomForestRegressor using **Mean Squared Error** and **R2 Score** metrics.


Fork this notebook or suggest some other algorithms (models) that I can add for comparison.

In [None]:
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

from sklearn.linear_model import ElasticNet, LinearRegression, Lasso, Ridge
from sklearn.svm import LinearSVR, SVR
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

from sklearn.neural_network import MLPRegressor

from sklearn.metrics import mean_squared_error,r2_score
from sklearn.pipeline import Pipeline

import matplotlib.pyplot as plt
%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
train_df = pd.read_csv('/kaggle/input/commonlitreadabilityprize/train.csv')

In [None]:
train_df.head()

In [None]:
train_df.isnull().sum()

In [None]:
new_train_df = train_df[['excerpt','target','standard_error']]

In [None]:
X = new_train_df['excerpt']
y = new_train_df['target']

In [None]:
X_train, X_test, y_train,y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
models = {'Elastic Net': ElasticNet(),
          'Lasso': Lasso(),
          'LinearRegression': LinearRegression(),
          'MLPRegressor': MLPRegressor(),
          'Ridge': Ridge(),
          'LinearSVR': LinearSVR(),
          'RandomForest': RandomForestRegressor(),
          'XGBoost': XGBRegressor(n_estimators=1000,eta=0.1, tree_method='hist'), 
          'SVR': SVR(kernel = 'rbf')}

In [None]:
mean_squared_errors = []
r2_scores = []
for m in models.values():
    pipeline = Pipeline([('tfidf',TfidfVectorizer()),('rgr',m)])
    pipeline.fit(X_train,y_train)
    preds = pipeline.predict(X_test)
    mean_squared_errors.append(mean_squared_error(y_test,preds))
    r2_scores.append(r2_score(y_test,preds))

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(12,5))
plt.plot(models.keys(),mean_squared_errors,color='r',marker='o')
plt.ylabel('Mean Squared Error')
plt.title('Mean Squared Error by Model')
plt.show()

In [None]:
plt.figure(figsize=(12,5))
plt.plot(models.keys(),r2_scores,color='b',marker='o')
plt.ylabel('Coeff. of determination (R2 Score)')
plt.title('R2 score by Model')
plt.show()