In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Importing Data

In [None]:
dataset = pd.read_csv('../input/predict-test-scores-of-students/test_scores.csv')

# Data Exploration

In [None]:
dataset.head()

In [None]:
dataset.describe()

In [None]:
dataset.count()

In [None]:
dataset.isna().sum()

# Data Cleaning and Preprocessing

In [None]:
dataset = dataset.drop(['student_id'], axis=1)

In [None]:
one_hot_encoded_data = pd.get_dummies(dataset, columns = ['teaching_method', 'gender', 'school_setting', 'lunch', 'school_type'])

In [None]:
dataset = one_hot_encoded_data

In [None]:
dataset.head()

In [None]:
dataset = dataset.drop(['school'], axis = 1)

In [None]:
dataset = dataset.drop(['classroom'], axis = 1)

In [None]:
y = dataset['posttest']

In [None]:
X = dataset.drop(['posttest'], axis = 1)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

# Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
regressor_LR = LinearRegression()
regressor_LR.fit(X_train, y_train)

In [None]:
y_pred_LR = regressor_LR.predict(X_test)

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred_LR)

# SVR

In [None]:
from sklearn.svm import SVR
regressor_SVR = SVR(kernel = 'rbf')
regressor_SVR.fit(X_train, y_train)

In [None]:
y_pred_SVR = regressor_SVR.predict(X_test)

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.3, 0.5, 0.7, 0.9]}]
grid_search = GridSearchCV(estimator = regressor_SVR,
                           param_grid = parameters,
                           scoring = 'r2', 
                           cv = 10, 
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_r2_score = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_r2_score*100))
print("Best Parameters:", best_parameters)

In [None]:
from sklearn.svm import SVR
regressor_SVR = SVR(kernel = 'linear', C = 0.5)
regressor_SVR.fit(X_train, y_train)

In [None]:
y_pred_SVR = regressor_SVR.predict(X_test)

In [None]:
r2_score(y_test, y_pred_SVR)

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor
regressor_DT = DecisionTreeRegressor(random_state = 0)
regressor_DT.fit(X, y)

In [None]:
y_pred_DT = regressor_DT.predict(X_test)

In [None]:
r2_score(y_test, y_pred_DT)

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
regressor_RF = RandomForestRegressor(n_estimators = 15, random_state = 0)
regressor_RF.fit(X, y)

In [None]:
y_pred_RF = regressor_RF.predict(X_test)

In [None]:
r2_score(y_test, y_pred_RF)