In [None]:
<span style="font-size:16pt; font-weight:bold;">Linear Regression</span>

In [3]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [5]:
X, y = make_regression(# synthetic data
    n_samples = 500, n_features = 5, noise = 10.0, random_state = 0
)

In [6]:
Xtr, Xte, ytr, yte = train_test_split( # hold-out split
    X, y, test_size=0.25, random_state = 42
)

In [7]:
est = LinearRegression().fit(Xtr, ytr) # fit baseline
print(r2_score(yte, est.predict(Xte))) # R^2 on hold-out

0.9919107215682054


<span style="font-size:16pt; font-weight:bold;">K-fold CV</span>

In [8]:
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression

In [11]:
X, y = make_classification(
    n_samples = 600, n_features = 10, n_informative = 5,
    class_sep = 1.2, random_state = 0
) # balanced classification

In [15]:
clf = LogisticRegression(max_iter=200) # linear classifier
scores = cross_val_score(clf, X, y, cv=5) # 5 fold CV
print(f"mean = {scores.mean()}, std = {scores.std()}") # mean and std

mean = 0.925, std = 0.017480147469502518


<span style="font-size:16pt; font-weight:bold;">Pipeline</span>

In [16]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [18]:
pipe=Pipeline([
    ("scale", StandardScaler()), # feature scaling
("clf", LogisticRegression(max_iter = 200)) # estimator
])

In [20]:
pipe.fit(X, y) # train scaler + model
print(pipe.score(X, y)) # default score (acc.)

0.9233333333333333


<span style="font-size:16pt; font-weight:bold;">Features vs Generalisation</span>

In [21]:
from sklearn.metrics import accuracy_score

In [22]:
X, y = make_classification( # many irrelevant dims
    n_samples=1500, n_features=50, n_informative=5,
    class_sep=1.0, random_state=0
)

In [23]:
Xtr, Xte, ytr, yte = train_test_split( # large test share
    X, y, test_size=0.4, random_state = 42
)

In [27]:
clf = LogisticRegression(max_iter=1000).fit(Xtr, ytr) # linear baseline
print('train acc:', accuracy_score(ytr,clf.predict(Xtr))) # optimistic train
print('test acc:', accuracy_score(yte,clf.predict(Xte))) # degrtaded test

train acc: 0.8377777777777777
test acc: 0.7933333333333333


<span style="font-size:16pt; font-weight:bold;">Pytorch: Simple Gradient</span>

In [28]:
import torch as th

ModuleNotFoundError: No module named 'torch'