In [26]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from math import sqrt

In [27]:
df = pd.read_csv('university_admission.csv')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
sns.heatmap(df.isnull(), yticklabels=False, cbar=False, cmap='Blues')

In [None]:
df.hist(bins=30, figsize=(10,10), color='r')
plt.show()

In [None]:
sns.pairplot(df)
plt.show()

In [None]:
for i in df.columns:
    plt.figure(figsize=(10, 5))
    sns.scatterplot(data=df, x=i, y='Chance_of_Admission', hue='University_Rating', hue_norm=(1,5))
    plt.show()

In [36]:
X = df.drop(columns=['Chance_of_Admission'])

In [37]:
y = df['Chance_of_Admission']

In [None]:
X.shape

In [None]:
y.shape

In [40]:
X = np.array(X)
y = np.array(y)

In [None]:
y = y.reshape(-1,1)
y.shape

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth=2, learning_rate=0.1)
model.fit(X_train, y_train)

In [None]:
accuracy = model.score(X_test, y_test)
print(accuracy * 100)

In [47]:
y_predict = model.predict(X_test)

In [None]:
k = X_test.shape[1]
n = len(X_test)
MSE = mean_squared_error(y_test, y_predict)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_predict)
r2 = r2_score(y_test, y_predict)
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)
print('MSE =', MSE, '\nRMSE =', RMSE, '\nMAE =', MAE, '\nR2 =', r2, '\nAdjusted R2 =', adj_r2)