# Tarea Desafío KNN


**Desarrollado por:** Orlando Patricio Chacón Molina

**Fecha de creación:** 2022-02-06

**Fecha de actualización:** 2022-02-06

In [1]:
#Importar librerías
import pandas as pd
import numpy as np
# Para escalar datos
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split


In [2]:
filename = 'data/abalone.data'
df = pd.read_csv(filename, header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [3]:
"""
Sex		        nominal			M, F, and I (infant)
Length		    continuous	mm	Longest shell measurement
Diameter	    continuous	mm	perpendicular to length
Height		    continuous	mm	with meat in shell
Whole weight	continuous	grams	whole abalone
Shucked weight	continuous	grams	weight of meat
Viscera weight	continuous	grams	gut weight (after bleeding)
Shell weight	continuous	grams	after being dried
Rings		    integer			+1.5 gives the age in years
"""
df.columns=['Sex','Length','Diameter','Height','WholeEeight','ShuckedWeight','isceraWeight','ShellWeight','Rings']
df.head()



Unnamed: 0,Sex,Length,Diameter,Height,WholeEeight,ShuckedWeight,isceraWeight,ShellWeight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4177 entries, 0 to 4176
Data columns (total 9 columns):
Sex              4177 non-null object
Length           4177 non-null float64
Diameter         4177 non-null float64
Height           4177 non-null float64
WholeEeight      4177 non-null float64
ShuckedWeight    4177 non-null float64
isceraWeight     4177 non-null float64
ShellWeight      4177 non-null float64
Rings            4177 non-null int64
dtypes: float64(7), int64(1), object(1)
memory usage: 293.8+ KB


In [5]:
df.describe()

Unnamed: 0,Length,Diameter,Height,WholeEeight,ShuckedWeight,isceraWeight,ShellWeight,Rings
count,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0
mean,0.523992,0.407881,0.139516,0.828742,0.359367,0.180594,0.238831,9.933684
std,0.120093,0.09924,0.041827,0.490389,0.221963,0.109614,0.139203,3.224169
min,0.075,0.055,0.0,0.002,0.001,0.0005,0.0015,1.0
25%,0.45,0.35,0.115,0.4415,0.186,0.0935,0.13,8.0
50%,0.545,0.425,0.14,0.7995,0.336,0.171,0.234,9.0
75%,0.615,0.48,0.165,1.153,0.502,0.253,0.329,11.0
max,0.815,0.65,1.13,2.8255,1.488,0.76,1.005,29.0


In [6]:
df.groupby('Rings')['Length'].count().to_frame()

Unnamed: 0_level_0,Length
Rings,Unnamed: 1_level_1
1,1
2,1
3,15
4,57
5,115
6,259
7,391
8,568
9,689
10,634


## Estimación de la edad utilizando KNN(Regresión)

La edad del abulón se calcula de acuerdo al número de anillos:

$Edad = Rings +1.5$

In [54]:
X = df.loc[:,['Length','Diameter','Height','WholeEeight','ShuckedWeight','isceraWeight','ShellWeight']]
y = df.loc[:,['Rings']]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state=9993)

# Regresor KNN
neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X_train, y_train)

# El número de 'Rings' + 1.5 es la edad, por lo tanto habrá que sumar 1.5 a las predicciones
TestPredDf = pd.DataFrame(y_test)
TestPredDf['RingsPred']=neigh.predict(X_test)
TestPredDf['EdadPred']=neigh.predict(X_test)

# Se suma 1.5 a todas las predicciones y se pronostica la edad del abulón
TestPredDf['EdadPred'] = TestPredDf['EdadPred'].apply(lambda x: x+1.5)
TestPredDf.head()


Unnamed: 0,Rings,RingsPred,EdadPred
1619,10,10.0,11.5
59,7,10.5,12.0
3222,18,14.0,15.5
3612,10,11.5,13.0
1887,9,11.0,12.5


## Estimación del sexo utilizando KNN (Clasificación)

In [56]:
X = df.loc[:,['Length','Diameter','Height','WholeEeight','ShuckedWeight','isceraWeight','ShellWeight',]]
y = df.loc[:,['Sex']]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state=9993)

# Clasificador KNN
neigh = KNeighborsClassifier(n_neighbors=2)
neigh.fit(X_train, y_train)

# El número de 'Rings' + 1.5 es la edad, por lo tanto habrá que sumar 1.5 a las predicciones
TestPredDf = pd.DataFrame(y_test)
TestPredDf['SexPred']=neigh.predict(X_test)

# Se suma 1.5 a todas las predicciones y se pronostica la edad del abulón
TestPredDf.head()

  import sys


Unnamed: 0,Sex,SexPred
1619,I,I
59,F,F
3222,M,F
3612,M,F
1887,I,I
