# Get started with data in Pandas

In [None]:
import pandas as pd 
# Install Pandas and Matplotlib from requirements.txt
# in the terminal run...
# pip install -r requirements.txt

In [None]:
# Create a Pandas data frame to store and use our koala data
koala_data = pd.read_csv('koala_sightings_data.csv') 
print(koala_data.shape)

# Tell us about our data
print(koala_data.info())

# Explore your data

In [None]:
# Get just the column names
print(koala_data.columns)

#How many locations are we looking at?
print(koala_data.LGA.unique())

In [None]:
# Filter for only the results that are in the Gold Coast
golden_koalas = koala_data.loc[koala_data["LGA"] == "Gold Coast"]
print(golden_koalas)

In [None]:
# Save that data for later
golden_koalas.to_csv("golden_koalas.csv")

#How many locations are we looking at?
print(koala_data.LGA.unique())

# Visualise data

## Do koalas like tall trees?

In [6]:
# Prepare tree/koala height data

tree_heights = koala_data['HeightOfTree_m']
koala_heights = koala_data['HeightOfKoalaInTree_m']

print(koala_heights)

In [None]:
# Create a scatter plot to relate tree height to koala height in tree

import matplotlib.pyplot as plt

plt.scatter(koala_heights, tree_heights)

plt.title('Koala tree position vs Tree height')
plt.xlabel('HeightOfKoalaInTree_m')
plt.ylabel('HeightOfTree_m')

## Where are the koalas?

In [None]:
# For each LGA count how many sightings there are 
koala_sighting_count = koala_data.groupby('LGA')["_id"].count()

print(koala_sighting_count)

In [None]:
# Make the bar chart
import matplotlib.pyplot  as plt

# Sepearate the data for the X and Y axis
lgas = koala_sighting_count.keys()
counts = koala_sighting_count.values

# Make it a vertical bar chart
plt.bar(lgas, counts)

plt.title('Koala Sightings By LGA')
plt.xlabel('LGAs')
plt.ylabel('Koala Sightings')

plt.xticks(rotation = 45)

plt.show()

## How tall are the trees in each LGA?

In [None]:
# Get the 2 series of data
tree_heights_mean = koala_data.groupby('LGA')["HeightOfTree_m"].mean()

# Plot a column graph - height of trees
fig, ax = plt.subplots() # This is a different way to set up your plots to do fancier stuff!
x = tree_heights_mean.keys()
y = tree_heights_mean.values
hbars = ax.bar(x, y, color="#99926d")

# axies labels, etc
plt.xticks(rotation = 90)
plt.xlabel('LGAs')
plt.ylabel('Tree/Koala in tree height')

plt.show()

## How high are the koalas in each LGA?

In [None]:
# Get the 2 series of data
tree_heights_mean = koala_data.groupby('LGA')["HeightOfTree_m"].mean()
koala_in_tree_heights_mean = koala_data.groupby('LGA')["HeightOfKoalaInTree_m"].mean()

# Plot a column graph - height of trees
fig, ax = plt.subplots()
x = tree_heights_mean.keys()
y = tree_heights_mean.values
hbars = ax.bar(x, y, color="#99926d")

# Plot a scatter plot - height of Koalas
k_x = koala_in_tree_heights_mean.keys()
k_y = koala_in_tree_heights_mean.values
plt.scatter(k_x, k_y, color="#a0a2b0", s=200)

# plt.scatter(k_x, k_y, marker="$\U0001F43C$", s=300)
# Koala Unicode
# "$\U0001F428$"
# altenrate unicode
# "$\U0001F431$"


# axis labels, etc
plt.xticks(rotation = 90)
plt.title('Koala and tree heights by LGA')
plt.xlabel('LGAs')
plt.ylabel('Tree/Koala in tree height')

plt.show()