# Regression & Classification Algorithms

- Linear Regression
- KNN Regression
- KNN Classification

In [1]:
import pandas as pd
df1 = pd.read_csv('Datasets\weight-height.csv')
df1.head()

import warnings as wr
wr.filterwarnings('ignore')

In [2]:
df1.shape

(8555, 3)

In [3]:
df2 = df1.copy()

## Linear Regression

In [4]:
X = df1.drop('Weight', axis=1)
y = df1[['Weight']]

In [5]:
#Encoding the categorical features
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
X['Gender'] = encoder.fit_transform(X['Gender'])

In [6]:
X['Gender'].value_counts()

1    5000
0    3555
Name: Gender, dtype: int64

In [7]:
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


In [8]:
X_cols = X.columns
y_cols = y.columns

### Scalling the features

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [10]:
X[X_cols] = scaler.fit_transform(X[X_cols])
X['Gender'].value_counts()

 0.843208    5000
-1.185947    3555
Name: Gender, dtype: int64

In [11]:
X

Unnamed: 0,Gender,Height
0,0.843208,1.827233
1,0.843208,0.512039
2,0.843208,1.895545
3,0.843208,1.277788
4,0.843208,0.797634
...,...,...
8550,-1.185947,-1.642587
8551,-1.185947,-0.879344
8552,-1.185947,-0.318306
8553,-1.185947,0.161000


In [12]:
from sklearn.model_selection import train_test_split as tts
X_train, X_test, y_train, y_test = tts(X, y, test_size=.30, random_state= 25)

In [13]:
X_train.head()

Unnamed: 0,Gender,Height
8011,-1.185947,-1.035636
5610,-1.185947,-0.833441
5698,-1.185947,-0.785478
1007,0.843208,1.668782
44,0.843208,0.181863


In [14]:
X_test.head()

Unnamed: 0,Gender,Height
7908,-1.185947,0.140767
3347,0.843208,0.016387
5867,-1.185947,-0.816396
2246,0.843208,-0.41959
394,0.843208,0.258117


In [15]:
y_train.head()

Unnamed: 0,Weight
8011,138.402907
5610,142.682825
5698,134.228371
1007,201.099801
44,164.660277


In [16]:
y_test.head()

Unnamed: 0,Weight
7908,165.354678
3347,173.06134
5867,122.148253
2246,156.286199
394,173.459892


In [17]:
#X_test.head()

In [18]:
from sklearn.linear_model import LinearRegression
reg1 = LinearRegression()
reg1.fit(X_train, y_train)

In [19]:
y_pred = reg1.predict(X_test)
y_pred

array([[157.58206358],
       [173.97438662],
       [135.52811278],
       ...,
       [124.60020343],
       [127.97945134],
       [200.76365877]])

In [20]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
reg1_accuracy= r2_score(y_test, y_pred)
reg1_accuracy

0.9049666376262417

In [21]:
reg1_mse = mean_squared_error(y_test, y_pred)
reg1_mse

96.76738364036194

In [22]:
reg1_mae = mean_absolute_error(y_test, y_pred)
reg1_mae

7.831565729616084

## KNN Regression

In [23]:
from sklearn.neighbors import KNeighborsRegressor
KNN_reg1 = KNeighborsRegressor()

In [24]:
KNN_reg1.fit(X_train, y_train)

In [25]:
y_pred = KNN_reg1.predict (X_test)
y_pred

array([[156.36056284],
       [176.32397272],
       [133.40926498],
       ...,
       [128.05953058],
       [137.48707484],
       [208.88579718]])

In [26]:
KNN_reg1_accuracy= r2_score(y_test, y_pred)
KNN_reg1_accuracy

0.8832118750100906

In [27]:
KNN_reg1_mse = mean_squared_error(y_test, y_pred)
KNN_reg1_mse

118.91909339259313

In [28]:
KNN_reg1_mae = mean_absolute_error(y_test, y_pred)
KNN_reg1_mae

8.654404540905336

## KNN Classification

In [29]:
df2.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [30]:
df2.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [31]:
XX = df2.drop('Gender', axis=1)
yy = df2[['Gender']]

In [32]:
XX.head()

Unnamed: 0,Height,Weight
0,73.847017,241.893563
1,68.781904,162.310473
2,74.110105,212.740856
3,71.730978,220.04247
4,69.881796,206.349801


In [33]:
print(yy.head()) 
print(yy.columns)

  Gender
0   Male
1   Male
2   Male
3   Male
4   Male
Index(['Gender'], dtype='object')


In [34]:
encoder2 = LabelEncoder()
yy['Gender'] = encoder2.fit_transform(yy['Gender'])
yy

Unnamed: 0,Gender
0,1
1,1
2,1
3,1
4,1
...,...
8550,0
8551,0
8552,0
8553,0


### Scaling Features

In [35]:
XX_cols = XX.columns
yy_cols = yy.columns

In [36]:
XX[XX_cols] = scaler.fit_transform(XX[XX_cols])
XX

Unnamed: 0,Height,Weight
0,1.827233,2.380023
1,0.512039,-0.103684
2,1.895545,1.470197
3,1.277788,1.698073
4,0.797634,1.270738
...,...,...
8550,-1.642587,-1.718593
8551,-0.879344,-1.114506
8552,-0.318306,-0.302417
8553,0.161000,-0.435501


In [37]:
XX_train, XX_test, yy_train, yy_test = tts(XX, yy, test_size=.3, random_state=42)

In [38]:
from sklearn.neighbors import KNeighborsClassifier
KNN_clf1 = KNeighborsClassifier()
KNN_clf1.fit(XX_train, yy_train)

In [39]:
yy_pred = KNN_clf1.predict(XX_test)

In [40]:
KNN_clf_accuracy = r2_score(yy_test, yy_pred)
KNN_clf_accuracy

0.6598865296806506

In [41]:
KNN_clg1_mse = mean_squared_error(y_test, y_pred)
KNN_clg1_mse

118.91909339259313

In [42]:
KNN_clg1_mae = mean_absolute_error(y_test, y_pred)
KNN_clg1_mae

8.654404540905336

### Accuracy from different models:

In [43]:
print(f"Accuracy of Linear Regression Model: {round(reg1_accuracy*100, 2)}%")
print(f"Accuracy of KNN Regression Model: {round(KNN_reg1_accuracy*100, 2)}%")
print(f"Accuracy of KNN Clasification Model: {round(KNN_clf_accuracy*100, 2)}%")

Accuracy of Linear Regression Model: 90.5%
Accuracy of KNN Regression Model: 88.32%
Accuracy of KNN Clasification Model: 65.99%
