In [111]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import LeaveOneOut, LeavePOut, KFold, StratifiedKFold, cross_val_score

In [112]:
placement_package_data = pd.read_csv('placement_package.csv')
placement_package_data.head(3)

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25


In [113]:
placement_package_data.shape

(200, 2)

In [114]:
placement_package_data.isnull().sum()

cgpa       0
package    0
dtype: int64

In [115]:
x = placement_package_data[['cgpa']]
y = placement_package_data['package']

In [116]:
new_data = placement_package_data.head(10)

In [117]:
new_data

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57
5,7.89,2.99
6,6.73,2.6
7,6.75,2.48
8,6.09,2.31
9,8.31,3.51


In [118]:
x_new = new_data[['cgpa']]
y_new = new_data['package']

## Cross Validation Methods

### 1.Leave One Out

In [119]:
loo = LeaveOneOut()

for train, test in loo.split(x_new, y_new):
    print(f"Train Data: {train}, Test Data: {test}")

Train Data: [1 2 3 4 5 6 7 8 9], Test Data: [0]
Train Data: [0 2 3 4 5 6 7 8 9], Test Data: [1]
Train Data: [0 1 3 4 5 6 7 8 9], Test Data: [2]
Train Data: [0 1 2 4 5 6 7 8 9], Test Data: [3]
Train Data: [0 1 2 3 5 6 7 8 9], Test Data: [4]
Train Data: [0 1 2 3 4 6 7 8 9], Test Data: [5]
Train Data: [0 1 2 3 4 5 7 8 9], Test Data: [6]
Train Data: [0 1 2 3 4 5 6 8 9], Test Data: [7]
Train Data: [0 1 2 3 4 5 6 7 9], Test Data: [8]
Train Data: [0 1 2 3 4 5 6 7 8], Test Data: [9]


### 2.Leave P Out

In [120]:
lpo = LeavePOut(p=2)

for train, test in lpo.split(x_new, y_new):
    print(f"Train Data: {train}, Test Data: {test}")

Train Data: [2 3 4 5 6 7 8 9], Test Data: [0 1]
Train Data: [1 3 4 5 6 7 8 9], Test Data: [0 2]
Train Data: [1 2 4 5 6 7 8 9], Test Data: [0 3]
Train Data: [1 2 3 5 6 7 8 9], Test Data: [0 4]
Train Data: [1 2 3 4 6 7 8 9], Test Data: [0 5]
Train Data: [1 2 3 4 5 7 8 9], Test Data: [0 6]
Train Data: [1 2 3 4 5 6 8 9], Test Data: [0 7]
Train Data: [1 2 3 4 5 6 7 9], Test Data: [0 8]
Train Data: [1 2 3 4 5 6 7 8], Test Data: [0 9]
Train Data: [0 3 4 5 6 7 8 9], Test Data: [1 2]
Train Data: [0 2 4 5 6 7 8 9], Test Data: [1 3]
Train Data: [0 2 3 5 6 7 8 9], Test Data: [1 4]
Train Data: [0 2 3 4 6 7 8 9], Test Data: [1 5]
Train Data: [0 2 3 4 5 7 8 9], Test Data: [1 6]
Train Data: [0 2 3 4 5 6 8 9], Test Data: [1 7]
Train Data: [0 2 3 4 5 6 7 9], Test Data: [1 8]
Train Data: [0 2 3 4 5 6 7 8], Test Data: [1 9]
Train Data: [0 1 4 5 6 7 8 9], Test Data: [2 3]
Train Data: [0 1 3 5 6 7 8 9], Test Data: [2 4]
Train Data: [0 1 3 4 6 7 8 9], Test Data: [2 5]
Train Data: [0 1 3 4 5 7 8 9], Test Data

### 3.K-Fold

In [121]:
kf = KFold(n_splits=5)

for train, test in kf.split(x_new, y_new):
    print(f"Train Data: {train}, Test Data: {test}")

Train Data: [2 3 4 5 6 7 8 9], Test Data: [0 1]
Train Data: [0 1 4 5 6 7 8 9], Test Data: [2 3]
Train Data: [0 1 2 3 6 7 8 9], Test Data: [4 5]
Train Data: [0 1 2 3 4 5 8 9], Test Data: [6 7]
Train Data: [0 1 2 3 4 5 6 7], Test Data: [8 9]


### 4.Stratified K-Fold only works in Classification analysis   :(   :(

In [122]:
#skf = StratifiedKFold(n_splits=5)

#for train, test in skf.split(x_new, y_new):
#   print(f"Train Data: {train}, Test Data: {test}")

## Cross Validation Score Example Using Linear Regression

### 1.Simple Value

In [123]:
sc = cross_val_score(LinearRegression(), x, y, cv=10)
sc.sort()
sc*100

array([60.48000765, 65.67540106, 67.20523867, 69.890411  , 73.50599138,
       74.37616704, 80.3181025 , 82.0986355 , 82.64799643, 83.96333567])

### 2.Using Special Method (ie. K-Fold)

In [124]:
sc = cross_val_score(LinearRegression(), x, y, cv=KFold(n_splits=20))
sc.sort()
sc*100

array([21.10649528, 50.11279915, 55.20455718, 59.10961505, 63.90636747,
       64.25812894, 70.4717229 , 71.7317162 , 71.81324716, 73.06523833,
       73.53529779, 74.79889073, 76.85308421, 77.35049667, 78.92047672,
       81.36536937, 82.46384717, 82.47658021, 88.9006271 , 91.16478623])