In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('../input/predict-test-scores-of-students/test_scores.csv')
df.head()

In [None]:
df.drop(['classroom', 'student_id'], axis=1, inplace=True)

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
for col in df.columns:
    print("\"{}\" unique value: {}".format(col, df[col].nunique()))

In [None]:
categories = ['school','school_setting', 'school_type', 'teaching_method', 'gender', 'lunch']
df_origin = pd.get_dummies(df, columns=categories, drop_first=True)
df_origin.head()

In [None]:
from sklearn.model_selection import train_test_split

X = df_origin.drop('posttest', axis=1).values
y = df_origin['posttest'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def evaluation(y_true, y_pred):
    print("R2 score: ", r2_score(y_true, y_pred))
    print("Mean Absolute Error: ", mean_absolute_error(y_true, y_pred))
    print("Mean Squared Error: ", mean_squared_error(y_true, y_pred))

In [None]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
evaluation(y_true=y_test, y_pred = lr.predict(X_test))

In [None]:
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.metrics import mean_squared_error

ridge_cv = RidgeCV(alphas=np.logspace(-10, 6, 20))
ridge_cv.fit(X_train, y_train)
ridge = Ridge(alpha = ridge_cv.alpha_)
ridge.fit(X_train, y_train)
evaluation(y_true=y_test, y_pred = ridge.predict(X_test))

In [None]:
from sklearn.svm import SVR

svr_reg = SVR(C=2.5, epsilon=0.3, kernel='rbf')
svr_reg.fit(X_train, y_train)
evaluation(y_true=y_test, y_pred = svr_reg.predict(X_test))

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=1000, min_samples_leaf=2, min_samples_split=6, max_features='sqrt', max_depth=30)
rf.fit(X_train, y_train)
evaluation(y_true=y_test, y_pred = rf.predict(X_test))

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

gbr = GradientBoostingRegressor( n_estimators=1000, learning_rate=0.1, max_depth=30,min_samples_split=6, min_samples_leaf=2, max_features='sqrt')
gbr.fit(X_train, y_train)
evaluation(y_true=y_test, y_pred = gbr.predict(X_test))