In [38]:
from __future__ import print_function, division

# Necessary imports
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
%matplotlib inline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

In [56]:
df = pd.read_csv('data_std.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Item Size,Overall Fit,Height,Bust,Waist,Hips,Bra,new
0,0,Item Size: XXS,Overall Fit: True to Size,178,84,72,93,81.28,-2.438697
1,1,Item Size: XXS,Overall Fit: True to Size,153,79,61,82,81.28,-2.438697
2,2,Item Size: XS,Overall Fit: True to Size,160,89,61,94,81.28,-1.406697
3,3,Item Size: XS,Overall Fit: True to Size,150,77,61,85,81.28,-1.406697
4,4,Item Size: XS,Overall Fit: True to Size,163,78,66,82,81.28,-1.406697


In [64]:
y = df['new']
X=df.filter(items=['Height', 'Bust', 'Waist', 'Hips', 'Bra'])

In [67]:
alphas = [1e-10,1e-9,1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1, 1e0]

In [71]:
results = {}
for alpha in alphas:
    kf = KFold(n_splits=5, random_state=8)
    splits = kf.split(X)
    for train_index, test_index in splits:
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[train_index], y[test_index]
        lr = Lasso(alpha=alpha)
        lr.fit(X_train, y_train)
        score = lr.score(X_test, y_test)
        result_list = results.get(alpha, [])
        result_list.append(score)
        results[alpha] = result_list

In [72]:
for alpha in results.keys():
    results[alpha] = np.array(results[alpha]).mean()
results

{1e-10: -4.0489753769232087e+31,
 1e-09: -4.0489753751400174e+31,
 1e-08: -4.048975357132252e+31,
 1e-07: -4.048975176305306e+31,
 1e-06: -4.0489739478020413e+31,
 1e-05: -4.048961715462604e+31,
 0.0001: -4.0487832749622833e+31,
 0.001: -4.0469784599819018e+31,
 0.01: -4.028975720532161e+31,
 0.1: -3.857266879296345e+31,
 1.0: -2.7380681682417363e+31}

In [75]:
lr = Lasso(alpha=1.0)
lr.fit(X, y)
lr.coef_

array([0.        , 0.00672812, 0.01548462, 0.0173144 , 0.03175039])

In [78]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
from sklearn.linear_model import Perceptron
from sklearn.pipeline import Pipeline

In [79]:
clf = LinearRegression()
poly = PolynomialFeatures(2)
X = poly.fit_transform(X)

In [83]:
alphas = [1e-10,1e-9,1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1, 1e0]
results = {}
for alpha in alphas:
    kf = KFold(n_splits=5, random_state=8)
    splits = kf.split(X)
    for train_index, test_index in splits:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        lr = Lasso(alpha=alpha)
        lr.fit(X_train, y_train)
        score = lr.score(X_test, y_test)
        result_list = results.get(alpha, [])
        result_list.append(score)
        results[alpha] = result_list



In [84]:
for alpha in results.keys():
    results[alpha] = np.array(results[alpha]).mean()
results

{1e-10: -5.183534830791785e+31,
 1e-09: -5.183534777794541e+31,
 1e-08: -5.183534247822012e+31,
 1e-07: -5.183528948101416e+31,
 1e-06: -5.183475951279925e+31,
 1e-05: -5.182946021519007e+31,
 0.0001: -5.177650573904385e+31,
 0.001: -5.125069806418898e+31,
 0.01: -4.7483450276755565e+31,
 0.1: -4.693734031001299e+31,
 1.0: -4.385005825089531e+31}

In [85]:
lr = Lasso(alpha=1.0)
lr.fit(X, y)
lr.coef_



array([ 0.00000000e+00, -0.00000000e+00, -0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -5.17445287e-05, -0.00000000e+00,
        8.99577516e-04,  1.97409249e-04, -7.50270890e-04,  1.41186868e-03,
       -1.16254111e-03, -1.38110449e-04, -1.57056414e-03,  0.00000000e+00,
       -0.00000000e+00,  0.00000000e+00, -0.00000000e+00, -0.00000000e+00,
        1.87362880e-03])

In [86]:
poly.get_feature_names()

['1',
 'x0',
 'x1',
 'x2',
 'x3',
 'x4',
 'x0^2',
 'x0 x1',
 'x0 x2',
 'x0 x3',
 'x0 x4',
 'x1^2',
 'x1 x2',
 'x1 x3',
 'x1 x4',
 'x2^2',
 'x2 x3',
 'x2 x4',
 'x3^2',
 'x3 x4',
 'x4^2']

In [87]:
y = df['new']
X=df.filter(items=['Bust', 'Waist', 'Hips', 'Bra'])

In [100]:
clf = LinearRegression()
poly = PolynomialFeatures(2)
X = poly.fit_transform(X)

In [101]:
alphas = [1e-10,1e-9,1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1, 1e0]
results = {}
for alpha in alphas:
    kf = KFold(n_splits=5, random_state=8)
    splits = kf.split(X)
    for train_index, test_index in splits:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        lr = Lasso(alpha=alpha)
        lr.fit(X_train, y_train)
        score = lr.score(X_test, y_test)
        result_list = results.get(alpha, [])
        result_list.append(score)
        results[alpha] = result_list



In [102]:
for alpha in results.keys():
    results[alpha] = np.array(results[alpha]).mean()
results

{1e-10: -5.491827466412347e+31,
 1e-09: -5.491827457040664e+31,
 1e-08: -5.491827363324471e+31,
 1e-07: -5.491826426165543e+31,
 1e-06: -5.491817054662425e+31,
 1e-05: -5.491723347209343e+31,
 0.0001: -5.490786807363064e+31,
 0.001: -5.481474056557204e+31,
 0.01: -5.4568872706486685e+31,
 0.1: -5.444378011298627e+31,
 1.0: -5.40681061957835e+31}

In [91]:
lr = Lasso(alpha=1.0)
lr.fit(X, y)
lr.coef_



array([ 0.00000000e+00, -0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  1.49607982e-03, -5.23378688e-04, -1.16371558e-05,
       -2.37306623e-03,  6.46255342e-04,  0.00000000e+00,  0.00000000e+00,
        1.06323162e-04,  0.00000000e+00,  1.56754427e-03])

In [92]:
poly.get_feature_names()

['1',
 'x0',
 'x1',
 'x2',
 'x3',
 'x0^2',
 'x0 x1',
 'x0 x2',
 'x0 x3',
 'x1^2',
 'x1 x2',
 'x1 x3',
 'x2^2',
 'x2 x3',
 'x3^2']

In [96]:
y = df['new']
X=df.filter(items=['Bust', 'Waist', 'Hips', 'Bra'])

In [97]:
clf = LinearRegression()
poly = PolynomialFeatures(3)
X = poly.fit_transform(X)

In [98]:
lr = Lasso(alpha=1.0)
lr.fit(X, y)
lr.coef_



array([ 0.00000000e+00, -0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00, -1.33566112e-04,  0.00000000e+00,  0.00000000e+00,
       -6.06530811e-05, -0.00000000e+00,  2.51433239e-04,  0.00000000e+00,
        6.96460260e-04,  0.00000000e+00,  0.00000000e+00,  2.03222032e-05,
       -1.10963062e-05, -3.66605786e-06, -6.82158348e-06,  3.54388888e-06,
       -2.07025402e-06, -7.13773159e-06,  6.41409202e-07, -6.30374341e-06,
       -3.84989577e-06,  4.17923205e-06,  5.15901300e-06, -1.99409944e-06,
        6.80559973e-07, -2.95520520e-06,  6.50253371e-06, -5.49162463e-07,
       -3.50267221e-06,  1.97819785e-07,  1.06897403e-05])

In [99]:
poly.get_feature_names()

['1',
 'x0',
 'x1',
 'x2',
 'x3',
 'x0^2',
 'x0 x1',
 'x0 x2',
 'x0 x3',
 'x1^2',
 'x1 x2',
 'x1 x3',
 'x2^2',
 'x2 x3',
 'x3^2',
 'x0^3',
 'x0^2 x1',
 'x0^2 x2',
 'x0^2 x3',
 'x0 x1^2',
 'x0 x1 x2',
 'x0 x1 x3',
 'x0 x2^2',
 'x0 x2 x3',
 'x0 x3^2',
 'x1^3',
 'x1^2 x2',
 'x1^2 x3',
 'x1 x2^2',
 'x1 x2 x3',
 'x1 x3^2',
 'x2^3',
 'x2^2 x3',
 'x2 x3^2',
 'x3^3']

In [105]:
y = df['new']
X=df.filter(items=['Waist', 'Hips', 'Bra'])

In [106]:
clf = LinearRegression()
poly = PolynomialFeatures(2)
X = poly.fit_transform(X)

In [114]:
alphas = [1e-10,1e-9,1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1, 1e0]
results = {}
for alpha in alphas:
    kf = KFold(n_splits=5, random_state=8)
    splits = kf.split(X)
    for train_index, test_index in splits:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        lr = Lasso(alpha=alpha)
        lr.fit(X_train, y_train)
        score = lr.score(X_test, y_test)
        result_list = results.get(alpha, [])
        result_list.append(score)
        results[alpha] = result_list



In [115]:
for alpha in results.keys():
    results[alpha] = np.array(results[alpha]).mean()
results

{1e-10: -5.610273944187317e+31,
 1e-09: -5.610273931656518e+31,
 1e-08: -5.610273806348558e+31,
 1e-07: -5.610272553269394e+31,
 1e-06: -5.610260022543275e+31,
 1e-05: -5.610134721811397e+31,
 0.0001: -5.608882367439078e+31,
 0.001: -5.5964241027503055e+31,
 0.01: -5.456761731994007e+31,
 0.1: -5.510722390284662e+31,
 1.0: -5.2943595401982895e+31}

In [107]:
lr = Lasso(alpha=1.0)
lr.fit(X, y)
lr.coef_

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.00031944,
        0.        , -0.        ,  0.00011707, -0.        ,  0.00049044])

In [108]:
poly.get_feature_names()

['1', 'x0', 'x1', 'x2', 'x0^2', 'x0 x1', 'x0 x2', 'x1^2', 'x1 x2', 'x2^2']

In [109]:
y = df['new']
X=df.filter(items=['Waist', 'Hips', 'Bra'])

In [110]:
clf = LinearRegression()
poly = PolynomialFeatures(3)
X = poly.fit_transform(X)

In [111]:
lr = Lasso(alpha=1.0)
lr.fit(X, y)
lr.coef_



array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        1.43987836e-04,  6.60490725e-04,  0.00000000e+00,  4.97189680e-04,
        0.00000000e+00,  0.00000000e+00,  1.33372489e-05, -4.01962969e-06,
       -9.04246350e-06, -8.64446530e-07, -7.39439250e-06, -3.76711686e-07,
        1.30805138e-06, -4.11340492e-06, -1.05655667e-06,  1.00207946e-05])