In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns

%matplotlib inline
sns.set()

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [1]:
parks = pd.read_csv("../input/park-biodiversity/parks.csv")
species = pd.read_csv("../input/park-biodiversity/species.csv")
species.head()

# Exploratoration

In [1]:
sns.heatmap(species.isnull())

In [1]:
df = pd.crosstab(species['Category'], species['Conservation Status'])
df 

In [1]:
style.use('default')
df.plot.barh(stacked=True, figsize=[15,8],
             edgecolor='white',
             width=1, 
             colormap='viridis_r')

<i>Species of concern</i> is an informal term that refers to those species which Region 3 believes might be in need of concentrated conservation actions. Such conservation actions vary depending on the health of the populations and degree and types of threats. At one extreme, there may only need to be periodic monitoring of populations and threats to the species and its habitat. At the other extreme, a species may need to be listed as a Federal threatened or endangered species. Species of concern receive no legal protection and the use of the term does not necessarily mean that the species will eventually be proposed for listing as a threatened or endangered species.

The above heatmap demonstrate that we need to deal with species that has more important concern, and that is the endangered species. Birds, Mammals and Vascular plants has many endangered species (I can't reverse the color of the heatmap, to visualize it clearly). For now, I focused on data provided on mammals.

I am a first timer in data analysis, so for now I focused on determining which Families/Order are on endangered status.

In [1]:
mammal = species[species['Category']=='Mammal']
mammal_df = pd.crosstab(mammal['Order'], mammal['Conservation Status'], margins=True)
mammal_df.head()

In [1]:
mammal_df = mammal_df.drop(['All', 'Species of Concern'], axis=1)
mammal_df = mammal_df.drop(['All'], axis=0)

In [1]:
style.use('default')
mammal_df.plot.barh(stacked=True, 
                    figsize=[20,5], 
                    colormap='viridis', 
                    edgecolor='white', 
                    width=0.9,
                    title='Conservation Status of Mammals')

I deleted the <i>Breeder and Resident</i> Column because it does not provide any useful information. 

In [1]:
carn = mammal[mammal['Order']=='Carnivora']
ceta = mammal[mammal['Order']=='Cetacea']
arti = mammal[mammal['Order']=='Artiodactyla']
chir = mammal[mammal['Order']=='Chiroptera']

carn = carn[carn['Conservation Status']=='Endangered']
ceta = ceta[ceta['Conservation Status']=='Endangered']
arti = arti[arti['Conservation Status']=='Endangered']
chir = chir[chir['Conservation Status']=='Endangered']

carn = pd.crosstab(carn['Family'], carn['Conservation Status'], margins=True)
ceta = pd.crosstab(ceta['Family'], ceta['Conservation Status'], margins=True)
arti = pd.crosstab(arti['Family'], arti['Conservation Status'], margins=True)
chir = pd.crosstab(chir['Family'], chir['Conservation Status'], margins=True)

carn = carn.drop(['All'],axis=1)
carn = carn.drop(['All'], axis=0)
carn = carn.reset_index()
carn['specie'] = 'Carnivora'

ceta = ceta.drop(['All'],axis=1)
ceta = ceta.drop(['All'], axis=0)
ceta = ceta.reset_index()
ceta['specie'] = 'Cetacean'

arti = arti.drop(['All'],axis=1)
arti = arti.drop(['All'], axis=0)
arti = arti.reset_index()
arti['specie'] = 'Artiodactyla'

chir = chir.drop(['All'],axis=1)
chir = chir.drop(['All'], axis=0)
chir = chir.reset_index()
chir['specie'] = 'Chiroptera'

In [1]:
df_specie = pd.concat([ceta, carn, arti, chir],axis=0)

In [1]:
df_specie = pd.DataFrame(pd.concat([df_specie['Family'],
                       df_specie['Endangered'],
                       df_specie['specie']], axis=1))
df_specie

In [1]:
style.use('default')
plt.figure(figsize=[10,4.5])
sns.barplot(x = df_specie['specie'], 
            y = df_specie['Endangered'], 
            hue=df_specie['Family'],
            palette='viridis',
           dodge=True,
           edgecolor='None')
plt.xlabel('Specie')
plt.ylabel('Count')
plt.title('Endangered Mammals', fontsize=15)

plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')

In [1]:
new_df = species[species['Category']=='Mammal']
new_df = new_df[['Category','Family','Abundance','Nativeness', 'Occurrence', 'Seasonality']]

In [1]:
df_ab = pd.crosstab(new_df['Family'],
           new_df['Abundance'])

df_nat = pd.crosstab(new_df['Family'],
           new_df['Nativeness'])

df_occ = pd.crosstab(new_df['Family'],
           new_df['Occurrence'])


df_seo = pd.crosstab(new_df['Family'],
           new_df['Seasonality'])

In [1]:
df_add = pd.concat([df_ab, df, df_nat, df_occ, df_seo], axis=1)
df_add

In [1]:
df_add.fillna(0, inplace=True)

In [1]:
sns.heatmap(df_add.corr())