# Machine Learning

## Import Libraries

In [1]:
import numpy as np
import pandas as pd

## Linear Regression

In [2]:
# load dataset
admit = pd.read_csv('https://docs.google.com/spreadsheets/d/1qTkzdJeMuIjt9tS6opLOe14pgno48WlrfCMojVDZEz8/export?format=csv')
admit.head()

Unnamed: 0,gre_score,toefl_score,univ_ranking,motiv_letter_strength,recommendation_strength,gpa,research_exp,admit_prob
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


In [3]:
len(admit)

500

In [4]:
# split train test
from sklearn.model_selection import train_test_split

feature = admit.drop(columns='admit_prob')
target = admit[['admit_prob']]

feature_admit_train, feature_admit_test, target_admit_train, target_admit_test = train_test_split(feature, target, test_size=0.20, random_state=42)

In [5]:
feature_admit_train.head()

Unnamed: 0,gre_score,toefl_score,univ_ranking,motiv_letter_strength,recommendation_strength,gpa,research_exp
249,321,111,3,3.5,4.0,8.83,1
433,316,111,4,4.0,5.0,8.54,0
19,303,102,3,3.5,3.0,8.5,0
322,314,107,2,2.5,4.0,8.27,0
332,308,106,3,3.5,2.5,8.21,1


In [6]:
# convert data into numpy arrays
X_admit_train = feature_admit_train.to_numpy()
y_admit_train = target_admit_train.to_numpy().ravel()

In [7]:
from sklearn.linear_model import LinearRegression

# define the model
linreg = LinearRegression()

# train the model
linreg.fit(X_admit_train, y_admit_train)

In [8]:
# retrieve the coefficients
# show as a nice dataframe

data = feature_admit_train
model = linreg

coef_df = pd.DataFrame({
    'feature':['intercept'] + data.columns.tolist(),
    'coefficient':[model.intercept_] + list(model.coef_)
})

coef_df

Unnamed: 0,feature,coefficient
0,intercept,-1.421447
1,gre_score,0.002434
2,toefl_score,0.002996
3,univ_ranking,0.002569
4,motiv_letter_strength,0.001814
5,recommendation_strength,0.017238
6,gpa,0.112527
7,research_exp,0.024027


In [9]:
# model evaluation on training data
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

# create prediction vector on training data
y_predict_train = linreg.predict(X_admit_train)

print('MAE for traning_data is {}'.format(mean_absolute_error(y_admit_train, y_predict_train)))
print('MAPE for traning_data is {}'.format(mean_absolute_percentage_error(y_admit_train, y_predict_train)))

MAE for traning_data is 0.04253334061164317
MAPE for traning_data is 0.06848166838244786


In [10]:
# prepare prediction result on test data
X_admit_test = feature_admit_test.to_numpy()
y_admit_test = target_admit_test.to_numpy().ravel()

In [11]:
# model evaluation on test data

# create prediction vector on training data
y_predict_test = linreg.predict(X_admit_test)

print('MAE for test data is {}'.format(mean_absolute_error(y_admit_test, y_predict_test)))
print('MAPE for test data is {}'.format(mean_absolute_percentage_error(y_admit_test, y_predict_test)))

MAE for test data is 0.042722654277053636
MAPE for test data is 0.06857756648317814


# Selesai