# Dogs Intelligence and Size
## 강아지 품종 기반 특성 탐색 

### About this dataset
##### 여기에는 브리티시 컬럼비아 대학의 개 심리학 교수인 Stanley Coren이 수행한 연구 데이터와 American Kennel Club(AKC)의 품종 크기 데이터가 포함됩니다.
##### 이 데이터로 지능과 복종심을 관련해서 더 큰 품종과 더 작은 품종을 비교할 수 있다. 


### 연구 아이디어
##### 다양한 개 품종의 순종과 지능 사이의 상관 관계를 조사합니다.
##### 특정 품종의 개에서 크기가 에너지 수준, 사교성 및 훈련 가능성과 같은 다른 특성과 어떻게 관련되는지 조사합니다.
##### 다양한 품종의 개에 대한 특정 행동 패턴 또는 의학적 문제와 관련된 크기 분석

In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder

In [2]:
AKC = pd.read_csv("AKC_Breed_Info.csv")
dog_in = pd.read_csv("dog_intelligence.csv")

In [3]:
AKC

Unnamed: 0,index,Breed,height_low_inches,height_high_inches,weight_low_lbs,weight_high_lbs
0,0,Akita,26,28,80,120
1,1,Anatolian Sheepdog,27,29,100,150
2,2,Bernese Mountain Dog,23,27,85,110
3,3,Bloodhound,24,26,80,120
4,4,Borzoi,26,28,70,100
...,...,...,...,...,...,...
145,145,Papillon,8,11,5,10
146,146,Pomeranian,12,12,3,7
147,147,Poodle Toy,10,10,10,10
148,148,Toy Fox Terrier,10,10,4,7


#### Columns

* Breed : 견종 ( String )
* height_low_inches : 개의 키의 하한 범위 (인치) (integer)
* height_high_inches : 개의 키의 상한 범위 (인치) (integer)
* weight_low_lbs : 개의 무게의 하한 범위 (파운드) (integer)
* weight_high_lbs : 개의 무게의 상한 범위 (파운드) (integer)

In [4]:
dog_in

Unnamed: 0,index,Breed,Classification,obey,reps_lower,reps_upper
0,0,Border Collie,Brightest Dogs,95%,1,4
1,1,Poodle,Brightest Dogs,95%,1,4
2,2,German Shepherd,Brightest Dogs,95%,1,4
3,3,Golden Retriever,Brightest Dogs,95%,1,4
4,4,Doberman Pinscher,Brightest Dogs,95%,1,4
...,...,...,...,...,...,...
131,131,Borzoi,Lowest Degree of Working/Obedience Intelligence,,81,100
132,132,Chow Chow,Lowest Degree of Working/Obedience Intelligence,,81,100
133,133,Bulldog,Lowest Degree of Working/Obedience Intelligence,,81,100
134,134,Basenji,Lowest Degree of Working/Obedience Intelligence,,81,100


#### Columns (dog_intelligence.csv)

* Breed : 견종 ( String )
* classification : American Kennel Club에 따른 개의 크기 분류. (string)
* obey : 품종이 첫 번째 명령을 따를 확률 (integer)
* eps_lower : 새로운 명령을 이해하기 위한 반복의 하한 (integer)
* reps_upper : 새로운 명령을 이해하기 위한 반복의 상한 (integer)

# EDA - AKC

In [5]:
AKC.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   index               150 non-null    int64 
 1   Breed               150 non-null    object
 2   height_low_inches   150 non-null    object
 3   height_high_inches  150 non-null    object
 4   weight_low_lbs      150 non-null    object
 5   weight_high_lbs     150 non-null    object
dtypes: int64(1), object(5)
memory usage: 7.2+ KB


In [6]:
AKC['Breed'].value_counts()

Akita                          1
Standard Schnauzer             1
Vizsla                         1
Welsh Springer Spaniel         1
Wirehaired Pointing Griffon    1
                              ..
Spinone Italiano               1
Tibetan Mastiff                1
Weimaraner                     1
Airdale Terrier                1
Yorkshire Terrier              1
Name: Breed, Length: 150, dtype: int64

In [7]:
AKC['height_low_inches']

0      26
1      27
2      23
3      24
4      26
       ..
145     8
146    12
147    10
148    10
149     8
Name: height_low_inches, Length: 150, dtype: object

In [8]:
AKC['height_high_inches']

0      28
1      29
2      27
3      26
4      28
       ..
145    11
146    12
147    10
148    10
149     8
Name: height_high_inches, Length: 150, dtype: object

## EDA - dog_intelligence

In [9]:
dog_in.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136 entries, 0 to 135
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   index           136 non-null    int64 
 1   Breed           136 non-null    object
 2   Classification  136 non-null    object
 3   obey            125 non-null    object
 4   reps_lower      136 non-null    int64 
 5   reps_upper      136 non-null    int64 
dtypes: int64(3), object(3)
memory usage: 6.5+ KB


In [10]:
(100 * (dog_in['obey'].isnull().sum() / len(dog_in))).round(2)

8.09

In [11]:
dog_in['obey'] # %가 포함되어 있는 object 데이터 타입이라 int로 바꿔줘야함.

0      95%
1      95%
2      95%
3      95%
4      95%
      ... 
131    NaN
132    NaN
133    NaN
134    NaN
135    NaN
Name: obey, Length: 136, dtype: object

In [15]:
dog_in_without_Nan = dog_in['obey'][:-11]
dog_in_without_Nan

0      95%
1      95%
2      95%
3      95%
4      95%
      ... 
120    30%
121    30%
122    30%
123    30%
124    30%
Name: obey, Length: 125, dtype: object

In [34]:
dog_in_without_Nan = dog_in_without_Nan.astype(str).str.replace("%","") # %가 포함되어 있어서 제거 후 평균값으로 Nan 대체
dog_in_without_Nan = dog_in_without_Nan.astype(int)
dog_in_without_Nan

0      95
1      95
2      95
3      95
4      95
       ..
120    30
121    30
122    30
123    30
124    30
Name: obey, Length: 125, dtype: int64

In [41]:
dog_in_without_Nan_mean = dog_in_without_Nan.mean()
dog_in_without_Nan_mean = round(dog_in_without_Nan_mean) # 평균값

In [53]:
dog_in['obey'] = dog_in_without_Nan
dog_in['obey'] = dog_in['obey'].fillna(dog_in_without_Nan_mean)

dog_in['obey'] = dog_in['obey'].astype(int)

In [54]:
dog_in

Unnamed: 0,index,Breed,Classification,obey,reps_lower,reps_upper
0,0,Border Collie,Brightest Dogs,95,1,4
1,1,Poodle,Brightest Dogs,95,1,4
2,2,German Shepherd,Brightest Dogs,95,1,4
3,3,Golden Retriever,Brightest Dogs,95,1,4
4,4,Doberman Pinscher,Brightest Dogs,95,1,4
...,...,...,...,...,...,...
131,131,Borzoi,Lowest Degree of Working/Obedience Intelligence,61,81,100
132,132,Chow Chow,Lowest Degree of Working/Obedience Intelligence,61,81,100
133,133,Bulldog,Lowest Degree of Working/Obedience Intelligence,61,81,100
134,134,Basenji,Lowest Degree of Working/Obedience Intelligence,61,81,100


## AKC 와 dog_in 두 데이터 프레임 합치기

In [73]:
df = pd.merge(AKC, dog_in)
df.drop(columns="index")

Unnamed: 0,Breed,height_low_inches,height_high_inches,weight_low_lbs,weight_high_lbs,Classification,obey,reps_lower,reps_upper
0,Akita,26,28,80,120,Average Working/Obedience Intelligence,50,26,40
1,Bernese Mountain Dog,23,27,85,110,Excellent Working Dogs,85,5,15
2,Bloodhound,24,26,80,120,Lowest Degree of Working/Obedience Intelligence,61,81,100
3,Borzoi,26,28,70,100,Lowest Degree of Working/Obedience Intelligence,61,81,100
4,Bullmastiff,25,27,100,130,Fair Working/Obedience Intelligence,30,41,80
...,...,...,...,...,...,...,...,...,...
100,Japanese Chin,8,11,4,11,Fair Working/Obedience Intelligence,30,41,80
101,Maltese,8,10,4,6,Fair Working/Obedience Intelligence,30,41,80
102,Papillon,8,11,5,10,Brightest Dogs,95,1,4
103,Pomeranian,12,12,3,7,Excellent Working Dogs,85,5,15


## column 이름 변경

In [72]:
columnRename = {
    'height_low_inches' : 'MinHeight',
    'height_high_inches' : 'MaxHeight',
    'weight_low_lbs' : 'MinWeight',
    'weight_high_lbs' : 'MaxWeight',
    'reps_lower' : 'MinReps',
    'reps_upper' : 'MaxReps'
}
df.rename(columns = columnRename, inplace = True)
df.head()

Unnamed: 0,Breed,MinHeight,MaxHeight,MinWeight,MaxWeight,index,Classification,obey,MinReps,MaxReps
0,Akita,26,28,80,120,102,Average Working/Obedience Intelligence,50,26,40
1,Bernese Mountain Dog,23,27,85,110,26,Excellent Working Dogs,85,5,15
2,Bloodhound,24,26,80,120,130,Lowest Degree of Working/Obedience Intelligence,61,81,100
3,Borzoi,26,28,70,100,131,Lowest Degree of Working/Obedience Intelligence,61,81,100
4,Bullmastiff,25,27,100,130,124,Fair Working/Obedience Intelligence,30,41,80
