In [None]:
#imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV

#load data
df = pd.read_csv('/kaggle/input/videogamesales/vgsales.csv')

#fill NA's in 'year' with mean, drop rows with NA's
df['Year'] = df['Year'].fillna(df['Year'].mean())
df.dropna(axis=0, inplace=True)
df.isna().sum()

#map publishers with <50 entries to the value 'Small publisher'
counts = df['Publisher'].value_counts()
df['Publisher'] = df['Publisher'].apply(lambda x: 'Small Publisher' if counts[x] < 50 else x)
df.drop(['NA_Sales',"EU_Sales",'JP_Sales', 'Other_Sales', 'Rank'], axis=1, inplace=True)

#get dummies of 'object' features
df_nums = df.select_dtypes(exclude='object')
df_objs = df.select_dtypes(include='object')
df_objs = pd.get_dummies(df_objs[['Platform','Genre','Publisher']],drop_first=True)
final_df = pd.concat([df_nums,df_objs],axis=1)

#final check of dataframe
final_df.describe(include='all')

#create train/test sets with .3 split
X_train, X_test, y_train, y_test = train_test_split(
    final_df.drop(labels=['Global_Sales'], axis=1),  # drop the target
    final_df['Global_Sales'],  # just the target
    test_size=0.3,
    random_state=0)

#fit and predict a linear model with both ridge and lasso regulization to remove features without predictive power
base_elastic_model = ElasticNet()
param_grid = {'alpha':[0.1,1,5,10,50,100],
              'l1_ratio':[.1, .5, .7, .9, .95, .99, 1]}
grid_model = GridSearchCV(estimator=base_elastic_model,
                          param_grid=param_grid,
                          scoring='neg_root_mean_squared_error',
                          cv=5,
                          verbose=1)
grid_model.fit(X_train,y_train)
grid_model.get_params()
y_pred = grid_model.predict(X_test)

#get mean squared error as model evaluator
mean_squared_error(y_test,y_pred, squared=False)