# Possum Head length prediction with Support Vector Regression (Feature Skull Width, Total length, Tail length, Chest length, Belly length )

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Load dataset

In [2]:
df = pd.read_csv('possum.csv')
df.head()

Unnamed: 0,case,site,Pop,sex,age,hdlngth,skullw,totlngth,taill,footlgth,earconch,eye,chest,belly
0,1,1,Vic,m,8.0,94.1,60.4,89.0,36.0,74.5,54.5,15.2,28.0,36.0
1,2,1,Vic,f,6.0,92.5,57.6,91.5,36.5,72.5,51.2,16.0,28.5,33.0
2,3,1,Vic,f,6.0,94.0,60.0,95.5,39.0,75.4,51.9,15.5,30.0,34.0
3,4,1,Vic,f,6.0,93.2,57.1,92.0,38.0,76.1,52.2,15.2,28.0,34.0
4,5,1,Vic,f,2.0,91.5,56.3,85.5,36.0,71.0,53.2,15.1,28.5,33.0


In [3]:
X = df.iloc[:, [6, 7, 8, 12, 13]].values
y = df.iloc[:, 5].values
print(X)
print(y)

[[60.4 89.  36.  28.  36. ]
 [57.6 91.5 36.5 28.5 33. ]
 [60.  95.5 39.  30.  34. ]
 [57.1 92.  38.  28.  34. ]
 [56.3 85.5 36.  28.5 33. ]
 [54.8 90.5 35.5 30.  32. ]
 [58.2 89.5 36.  30.  34.5]
 [57.6 91.  37.  29.  34. ]
 [56.3 91.5 37.  28.  33. ]
 [58.  89.5 37.5 27.5 32. ]
 [57.2 89.5 39.  31.  34. ]
 [55.6 92.  35.5 28.  33. ]
 [59.9 89.5 36.  27.  32. ]
 [57.6 91.5 36.  28.  31.5]
 [57.6 85.5 34.  28.  35. ]
 [56.  86.  34.5 28.  32. ]
 [67.7 89.5 36.5 29.  31. ]
 [55.7 90.  36.  28.  32. ]
 [55.4 90.5 35.  28.  32. ]
 [56.3 89.  38.  27.  36. ]
 [58.1 96.5 39.5 30.  40. ]
 [58.5 91.  39.5 28.  36. ]
 [56.1 89.  36.  28.  35. ]
 [54.9 84.  34.  27.  32. ]
 [58.5 91.5 35.5 31.  35. ]
 [59.  90.  36.  29.  38. ]
 [54.5 85.  35.  23.  28. ]
 [56.8 87.  34.5 27.  30. ]
 [56.  88.  35.  24.  32. ]
 [54.4 84.  33.5 24.5 33. ]
 [54.1 93.  37.  27.  31. ]
 [56.7 94.  39.  28.  34. ]
 [54.6 89.  37.  24.  30. ]
 [55.7 85.5 36.5 26.  28.5]
 [57.9 85.  35.5 28.  35.5]
 [59.3 88.  35.  25.

## Data Preprocessing - Handle missing data

In [4]:
print(df['totlngth'].isnull().any())
print(df['totlngth'].isnull().sum())

False
0


In [5]:
print(df['taill'].isnull().any())
print(df['taill'].isnull().sum())

False
0


In [6]:
print(df['footlgth'].isnull().any())
print(df['footlgth'].isnull().sum())

True
1


In [7]:
print(df['earconch'].isnull().any())
print(df['earconch'].isnull().sum())

False
0


In [8]:
print(df['eye'].isnull().any())
print(df['eye'].isnull().sum())

False
0


In [9]:
print(df['chest'].isnull().any())
print(df['chest'].isnull().sum())

False
0


In [10]:
print(df['belly'].isnull().any())
print(df['belly'].isnull().sum())

False
0


## Calculate missing data for foot length

In [11]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X[:, :])
X[: :] = imputer.transform(X[:, :])

## Feature Scaling

In [12]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(np.reshape(y, (len(y), 1)))

In [13]:
print(X)
print(y)

[[ 1.13488322  0.44560337 -0.51773159  0.49122226  1.24187379]
 [ 0.23119716  1.02838444 -0.26133118  0.73683338  0.15042415]
 [ 1.00578521  1.96083415  1.02067085  1.47366677  0.5142407 ]
 [ 0.06982464  1.14494065  0.50787004  0.49122226  0.5142407 ]
 [-0.18837137 -0.37029013 -0.51773159  0.73683338  0.15042415]
 [-0.67248891  0.79527201 -0.774132    1.47366677 -0.2133924 ]
 [ 0.42484417  0.56215958 -0.51773159  1.47366677  0.69614897]
 [ 0.23119716  0.91182823 -0.00493078  0.98244451  0.5142407 ]
 [-0.18837137  1.02838444 -0.00493078  0.49122226  0.15042415]
 [ 0.36029516  0.56215958  0.25146963  0.24561113 -0.2133924 ]
 [ 0.10209915  0.56215958  1.02067085  1.96488902  0.5142407 ]
 [-0.41429289  1.14494065 -0.774132    0.49122226  0.15042415]
 [ 0.97351071  0.56215958 -0.51773159  0.         -0.2133924 ]
 [ 0.23119716  1.02838444 -0.51773159  0.49122226 -0.39530067]
 [ 0.23119716 -0.37029013 -1.54333322  0.49122226  0.87805724]
 [-0.28519488 -0.25373391 -1.28693281  0.49122226 -0.21

## Splitting the dataset into training set and testset

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

## Training the SVR model on the training dataset

In [15]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train, y_train)

  return f(*args, **kwargs)


SVR()

## Predicting the test results

In [16]:
y_pred = sc_y.inverse_transform(regressor.predict(X_test))
print(y_pred)

[90.1017665  92.53138471 96.90567693 91.98847979 89.60095316 93.94036447
 93.6503091  90.12652164 96.45948543 91.83734065 92.77841914 89.24845519
 95.30001412 94.50622767 94.41897149 93.01953447 92.98455034 95.75452144
 91.59372589 93.38431462 86.79378887 91.60109482 93.83476071 92.99118273
 88.01759168 92.02168304]


## Calculate r2-Score

In [17]:
from sklearn.metrics import r2_score
r2_score(sc_y.inverse_transform(y_test), y_pred)

0.6241942694736546