In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

* Adelie : weight 3.6 ~ 6.0kg
* Chinstrap : weight 3.2 ~ 5.3kg
* Gentoo: weight 4.2 ~ 8.5kg

In [None]:
import matplotlib.pyplot as plt 
import seaborn as sns

In [None]:
path = '/kaggle/input/palmer-archipelago-antarctica-penguin-data/penguins_size.csv'
peng = pd.read_csv(path)

In [None]:
peng

In [None]:
peng.groupby(['sex', 'species'])['species'].count()

In [None]:
print('Average culmen length: ',peng['culmen_length_mm'].mean())
print('Aerage culmen depth: ', peng['culmen_depth_mm'].mean())
print('Average flipper length: ', peng['flipper_length_mm'].mean())
print('Average body mass: ', peng['body_mass_g'].mean())

In [None]:
# Filling NaN
peng.loc[peng.culmen_length_mm.isnull(), 'culmen_length_mm'] = 43.9
peng.loc[peng.culmen_depth_mm.isnull(), 'culmen_depth_mm'] = 17.1
peng.loc[peng.flipper_length_mm.isnull(), 'flipper_length_mm'] = 200.9
peng.loc[peng.body_mass_g.isnull(), 'body_mass_g'] = 4201.8
peng.loc[peng.sex.isnull(), 'sex'] = 'FEMALE' 

In [None]:
peng.info()

In [None]:
# Converting String values into Numerics
peng['sex'].replace(['MALE', 'FEMALE'], [0, 1], inplace = True)
peng['island'].replace(['Torgersen', 'Biscoe', 'Dream'], [0, 1, 2], inplace = True)
peng['species'].replace(['Adelie', 'Chinstrap', 'Gentoo'], [1, 2, 3], inplace = True)

In [None]:
peng.head()

In [None]:
fig = peng[peng.species == 1].plot(kind='scatter', x='culmen_length_mm', y='culmen_depth_mm', color='orange', label='Adelie')
peng[peng.species == 2].plot(kind='scatter', x='culmen_length_mm', y='culmen_depth_mm', color='blue', label='Christrap', ax=fig)
peng[peng.species == 3].plot(kind='scatter', x='culmen_length_mm', y='culmen_depth_mm', color='green', label='Gentoo', ax=fig)
fig.set_xlabel("Culmen Length(mm)")
fig.set_ylabel("Culmen depth(mm)")
fig.set_title("Culmen Length and depth")
fig = plt.gcf()
fig.set_size_inches(10,6)
plt.show()

In [None]:
fig = peng[peng.species == 1].plot(kind='scatter', x='flipper_length_mm', y='body_mass_g', color='orange', label='Adelie')
peng[peng.species == 2].plot(kind='scatter', x='flipper_length_mm', y='body_mass_g', color='blue', label='Christrap', ax=fig)
peng[peng.species == 3].plot(kind='scatter', x='flipper_length_mm', y='body_mass_g', color='green', label='Gentoo', ax=fig)
fig.set_xlabel("Flipper length(mm)")
fig.set_ylabel("Body mass(g)")
fig.set_title("Flipper length(mm) and Body mass(g)")
fig = plt.gcf()
fig.set_size_inches(10,6)
plt.show()

In [None]:
fig = peng[peng.species == 1].plot(kind='scatter', x='flipper_length_mm', y='body_mass_g', color='orange', label='Adelie')
peng[peng.species == 2].plot(kind='scatter', x='flipper_length_mm', y='body_mass_g', color='blue', label='Christrap', ax=fig)
peng[peng.species == 3].plot(kind='scatter', x='flipper_length_mm', y='body_mass_g', color='green', label='Gentoo', ax=fig)
fig.set_xlabel("Flipper length(mm)")
fig.set_ylabel("Body mass(g)")
fig.set_title("Flipper length(mm) and Body mass(g)")
fig = plt.gcf()
fig.set_size_inches(10,6)
plt.show()

In [None]:
peng['culmen_vol'] = peng['culmen_length_mm'] * peng['culmen_depth_mm']
peng.head()

In [None]:
sns.boxplot(x = peng['species'], y = peng['culmen_vol'])
plt.show()

In [None]:
sns.heatmap(peng.corr(), annot = True, cmap = 'RdYlGn', linewidth = 0.2)
fig = plt.gcf()
fig.set_size_inches(10, 8)
plt.show()

In [None]:
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier

In [None]:
peng.shape

In [None]:
train, test = train_test_split(peng, test_size = 0.3)
print(train.shape)
print(test.shape)

In [None]:
train_X = train[['island', 'culmen_vol', 'flipper_length_mm','body_mass_g']]
train_y = train.species

test_X = test[['island', 'culmen_vol', 'flipper_length_mm','body_mass_g']]
test_y = test.species

In [None]:
train_X.head(2)

In [None]:
# SVM
model = svm.SVC()
model.fit(train_X, train_y)
pred = model.predict(test_X)
print('Accuracy(SVM) : ', metrics.accuracy_score(pred, test_y).round(3))

In [None]:
# Decision Tree
model = DecisionTreeClassifier()
model.fit(train_X,train_y)
pred=model.predict(test_X)
print('Accuracy(Decision Tree) : ',metrics.accuracy_score(pred,test_y).round(3))

In [None]:
# KNN 
model = KNeighborsClassifier(n_neighbors= 5) #this examines 3 neighbours for putting the new data into a class
model.fit(train_X,train_y)
pred=model.predict(test_X)
print('Accuracy(KNN) : ',metrics.accuracy_score(pred,test_y).round(3))