# Student Performance Predictor

## Data Processing

### Import Lib  

In [186]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Loading Datasets

In [187]:
pd.set_option('display.max_columns', None)
dataset = pd.read_csv('student-mat.csv', sep= ';')
print(dataset.head())


  school sex  age address famsize Pstatus  Medu  Fedu     Mjob      Fjob  \
0     GP   F   18       U     GT3       A     4     4  at_home   teacher   
1     GP   F   17       U     GT3       T     1     1  at_home     other   
2     GP   F   15       U     LE3       T     1     1  at_home     other   
3     GP   F   15       U     GT3       T     4     2   health  services   
4     GP   F   16       U     GT3       T     3     3    other     other   

   reason guardian  traveltime  studytime  failures schoolsup famsup paid  \
0  course   mother           2          2         0       yes     no   no   
1  course   father           1          2         0        no    yes   no   
2   other   mother           1          2         3       yes     no  yes   
3    home   mother           1          3         0        no    yes  yes   
4    home   father           1          2         0        no    yes  yes   

  activities nursery higher internet romantic  famrel  freetime  goout  Dalc  \


### Encoding Categorical Data

#### Encoding Independent Variables

##### Taking care of Yes No values

In [188]:
from sklearn.preprocessing import LabelEncoder
yes_no_col = [15,16,17,18,19,20,21,22,23,24,25]
le = LabelEncoder()
for col in yes_no_col:
    dataset.iloc[:,col] = le.fit_transform(dataset.iloc[:,col])

print(dataset.head())

  school sex  age address famsize Pstatus  Medu  Fedu     Mjob      Fjob  \
0     GP   F   18       U     GT3       A     4     4  at_home   teacher   
1     GP   F   17       U     GT3       T     1     1  at_home     other   
2     GP   F   15       U     LE3       T     1     1  at_home     other   
3     GP   F   15       U     GT3       T     4     2   health  services   
4     GP   F   16       U     GT3       T     3     3    other     other   

   reason guardian  traveltime  studytime  failures schoolsup famsup paid  \
0  course   mother           2          2         0         1      0    0   
1  course   father           1          2         0         0      1    0   
2   other   mother           1          2         3         1      0    1   
3    home   mother           1          3         0         0      1    1   
4    home   father           1          2         0         0      1    1   

  activities nursery higher internet romantic  famrel  freetime  goout  Dalc  \


##### Defining Variables

In [189]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

##### Taking care of categorical values

In [190]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(),  [0, 1, 3, 4, 5, 8, 9, 10, 11])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [191]:
print(X)

[[1.0 0.0 1.0 ... 6 5 6]
 [1.0 0.0 1.0 ... 4 5 5]
 [1.0 0.0 1.0 ... 10 7 8]
 ...
 [0.0 1.0 0.0 ... 3 10 8]
 [0.0 1.0 0.0 ... 0 11 12]
 [0.0 1.0 0.0 ... 5 8 9]]


### Splitting the dataset into Training set and Test set


In [192]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state =1)

In [193]:
print(X_train)

[[1.0 0.0 0.0 ... 0 13 13]
 [1.0 0.0 1.0 ... 0 10 9]
 [1.0 0.0 0.0 ... 2 10 10]
 ...
 [1.0 0.0 1.0 ... 2 8 6]
 [1.0 0.0 0.0 ... 10 11 9]
 [1.0 0.0 0.0 ... 7 15 16]]


In [194]:
print(X_train[:5])

[[1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0
  0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 16 2 2 2 2 0 0 1 0 1 1 1 1 0 4 3 3
  2 4 5 0 13 13]
 [1.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0
  0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 19 4 4 2 2 0 0 1 1 1 1 1 1 0 1 2 3
  2 3 2 0 10 9]
 [1.0 0.0 0.0 1.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0
  0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 15 4 3 2 2 0 0 1 1 0 1 1 1 0 4 3 2
  1 2 3 2 10 10]
 [1.0 0.0 0.0 1.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0
  0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 16 0 2 1 1 0 0 0 1 0 0 1 1 0 3 2 1
  2 4 5 0 13 15]
 [1.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0
  1.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 16 1 1 4 1 0 1 1 0 1 0 1 1 1 4 4 4
  5 5 5 6 10 8]]


In [195]:
print(y_train)

[12  0 11 15 11 13 10 10 13  0 11 14  0 19 10  0  8  9  6 12 18 15  7  9
 14 11 18 13  6 13 14  9 12  8 11  8 13 10 12 11 15 16 15 14 18 16 18  8
  9 14  7  9  0  9  8 10  0  8 13 16  0 13 12 10 13 13 15 13 14 11  6 11
 11 13 15 11  0 12 15 10 11  8 18  0  7 13 10  8  0 10 17  9 11  0  5 15
 19  9 18  0 12 12 10 14 15  9  9 10 14  0 13 15 14  5 15 10 19 12  8 14
 16  0 11 17 10 13 16 14 14  9 11 12 12  6 11  9 10  0 11 12 12 10  0 10
 10  0 13  6  7  0  5 20 19  6 10 11 11 15 17  7 16 11 10 14  0  8 11 15
 12  9  0 16 14 14 16 11  5 11 10 11  8 15 11 15 15 12 10  9  8 14  0  0
 13 11 14 18 17 15  6  6  0 12 11 14 10  0  0 10 10 15  9  0 15  8 12 15
 18 18 13 15  8 10 16 11  9  9 10 13 10 10 12  8  0  8 14 13 14 13 11 12
  0 14  0 13 14 10 14  0 10 15  8  8 10 11 15 17 13 18  0 12 15  8 16 10
 14  8 16 13 12 11 10 11  8 15 10  7 10 12 15 16  9  4  0  7  0  6 11 16
  9 11  6  9  6  8 12 15  8 13 13  8 11 12  9  9 10 12 10 18  0 15 11  6
  8  5 10 15]


In [196]:
print(y_test)

[ 0 10  8 10 10 12  8 11 11  8 11 13  8  7 10 10 15 16  8 10 11 19 10 11
 16  5 11  9 10 13 12 11 10 11 14  8 10 10  7 10  9  9 15  9 11 13 18  9
 14 10  5 10 11  6  6 11  6 15 10  0  0  0 13 17 13  8 10 15  9 10 16 12
 12 14 12 11 10 13 12]


In [197]:
print(X_train.shape)

(316, 50)


### Feature Scaling

In [198]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:, 27:33] = sc.fit_transform(X_train[:,27:33])
X_test[:, 27:33] = sc.transform(X_test[:,27:33])
X_train[:, 40:] = sc.fit_transform(X_train[:,40:])
X_test[:, 40:] = sc.transform(X_test[:,40:])

In [199]:
print(X_train[:5])

[[1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0
  0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 -0.5139366821250756
  -0.709781259669727 -0.5073684183787618 0.8644153069527212
  -0.04514980726717193 -0.4426643112615976 0 1 0 1 1 1 1
  -0.7054291826636849 1.14718627649378 0.7853535113540139
  0.7793955763892654 0.584673738420927 1.3221564348088795
  1.0275701892534368 -0.6746582164971658 0.6136195264740169
  0.5811869668706116]
 [1.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0
  0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.8980037864619168
  1.1211646836415685 1.335486986307316 0.8644153069527212
  -0.04514980726717193 -0.4426643112615976 0 1 1 1 1 1 1
  -0.7054291826636849 -2.099179664151305 -0.22347457640154847
  0.7793955763892654 0.584673738420927 0.5368153945840564
  -1.1181050076898627 -0.6746582164971658 -0.2799462355803115
  -0.4306867605735383]
 [1.0 0.0 0.0 1.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0
  0.0 0.0 1.0 0.0 0.0 0.0 0.

In [200]:
print(X_test)

[[1.0 0.0 1.0 ... -0.6746582164971658 -1.4713672516527494
  -0.9366236242956132]
 [0.0 1.0 1.0 ... 1.390449441065669 -0.2799462355803115
  -0.17771832871250082]
 [1.0 0.0 0.0 ... 1.268972520032561 -1.4713672516527494
  -0.6836551924345757]
 ...
 [0.0 1.0 1.0 ... -0.6746582164971658 0.01790901843779795
  0.07525010314853664]
 [1.0 0.0 0.0 ... -0.43170437443095 0.9114747804921263 0.5811869668706116]
 [1.0 0.0 1.0 ... 0.05420330970148172 0.3157642724559074
  0.3282185350095741]]


## Regression Models

### Simple Linear Regression


In [201]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [202]:
y_pred = regressor.predict(X_test)

In [203]:
print(y_pred)

[ 4.84444395 11.36375304  5.39608109  8.05306226 10.00612326 10.46828042
  6.59728475 12.38209291 10.75833868  7.40025158 12.53203944 13.4486291
  5.9980494   8.83232203  7.81312912 11.32750003 16.54810773 16.77204959
  5.93321189  9.35167985 10.47149898 18.46493998  9.80280202 11.79411699
 15.40113713  5.30871888 12.02050837  7.54892513  8.94131787 12.31771003
 11.12851226 11.99598737  5.28853303 10.60992231 12.34760795  8.73761148
 10.56062355  7.77982208  7.51974567 10.28815726  8.99325442 11.18944198
 16.34690998  4.78877901 10.14122062 11.86567215 19.35912888  8.47942198
 12.69911988  7.90955907  5.18703107 12.74266893  9.51140852  4.89805775
  7.11048478 10.7389807   6.73315459 15.42475033  9.58987386  3.68302227
  5.4631397   8.12047133 10.37789912 15.44479114 13.70019145  9.11224532
 10.30429062 14.60928478  8.14174698  9.16800238 14.50395995 10.77109434
 11.76899884 13.57210387 13.04344579 10.52241058 10.74295844 12.71978226
 12.24419665]
