In [None]:
#import libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import scipy.stats

#read in data
bites = pd.read_csv("../input/Health_AnimalBites.csv")
animal = bites["SpeciesIDDesc"]
location = bites["WhereBittenIDDesc"]

#plot bar chart of animal species distribution
plt.figure(figsize=(10 , 7))
animal.value_counts().plot(
    kind="bar",
    title="Animal Species Distribution",
    color="orange",
    edgecolor="black"
)
plt.show()

In [None]:
#plot bar chart of bite location distribution
plt.figure(figsize=(10 , 7))
location.value_counts().plot(
    kind="bar",
    title="Bite Location Distribution",
    color="green",
    edgecolor="black"
)
plt.show()

In [None]:
#draw a grouped bar chart for the bite locations for each animal species
table = pd.crosstab(animal,location)
animalslist = []
for x in animal.unique():
    if isinstance(x,str):
        animalslist.append(x)

animalslist = sorted(animalslist)

pos = list(range(len(table["HEAD"]))) 
width = 0.25

fig, ax = plt.subplots(figsize=(13,8))

plt.bar(pos, 
        table['HEAD'], 
        width, 
        alpha=0.5, 
        color='#EE3224', 
        label=animalslist[0])

plt.bar([p + width for p in pos], 
        table['BODY'],
        width, 
        alpha=0.5, 
        color='#F78F1E', 
        label=animalslist[1]) 

plt.bar([p + width*2 for p in pos],
        table["UNKNOWN"], 
        width, 
        alpha=0.5, 
        color='#FFC222', 
        label=animalslist[2]) 

ax.set_ylabel('Bite count')
ax.set_title('Location of Bites by Animal Species')
ax.set_xticks([p + 1.5 * width for p in pos])
ax.set_xticklabels(animalslist)

plt.xlim(min(pos)-width, max(pos)+width*4)
plt.ylim([0, max(table['HEAD'] + table['BODY'] + table['UNKNOWN'])] )

plt.legend(['HEAD', 'BODY', 'UNKNOWN'], loc='upper left')
plt.grid()
plt.show()

The code I used for the grouped bar chart is from [here](https://chrisalbon.com/python/matplotlib_grouped_bar_plot.html).

In [None]:
#one-way test on animal species
scipy.stats.chisquare(animal.value_counts())

In [None]:
#one-way test on bite location
scipy.stats.chisquare(location.value_counts())

* The chi-square value for both tests is very high, indicating that both data sets are significantly different from a uniform distribution.
* The p-value for both one-way chi-square tests is returned as 0.0. This most likely means that the p-value is extremely low, which makes sense given the high chi-square values.   

In [None]:
#two-way chi-square test 
contingencyTable = pd.crosstab(animal, location)

scipy.stats.chi2_contingency(contingencyTable)

* Chi-squared value: 279.09688498443319
* P-value: 5.3420252124139959e-50
* Degrees of freedom: 16

From the values above, we can conclude that different animal species do not have a preference for where they bite. 