In [37]:
# This notebook is going to introduce the basics of the Pandas library,
# how to import data, and how to use Pandas to create cool data visualizations

# In order to work with our data set (We will be using a .csv file), we will
# need to import the file
# 1) Click the folder icon to the left
# 2) Click on the folder with the upwards arrow
# 3) In the dialog box select your desired file. In our notebook we will be
#    working with the 'pokemon.csv' file. I will attempt to upload this notebook,
#    and dataset to my github. It will be linked here: https://github.com/mgonzalez12000
#    under the 'Pandas-Intro-for-ACSC' repo

# Just like matplotlib, we are going to import Pandas

In [1]:
import pandas as pd

In [3]:
# To read 'pokemon.csv', we will use the built-in function of the pandas library
# known as .read_csv()
# We will create a new variable named df_pokemon and assign such value
# This value being stored is considered our dataframe and is essential for our
# entier programs
df_pokemon = pd.read_csv('pokemon.csv')

In [None]:
# Now if we want to print the dataset, we can simply call df_pokmen.
# However, if you take a look at our dataset, our dataset is pretty big. 
# So lets try printing the first 5 rows...
# In order to do so, we have to use the .head(), where its argument are n number
# of rows
df_pokemon.head(n=5)

In [None]:
# Pandas also allows you to sort our dataframe (df_pokmeon) by the value of the
# coloumn. The following line of code will sort df_pokmon in descending order by
# "base_total" and print to output

# We are going to be using a new function known as .sort_values()
# The arguments for .sort_values() are .sort_values('coloumnName', ascending = T/F)

df_pokemon.sort_values('base_total', ascending=False).head(n=5)

In [None]:
# Just like .head() returns the first n number of rows in a data set, we also
# have another function that will allow us to return the last n number of rows
# in the data set that we are working with

# That function is known as .tail() and works similar to how .head() works

In [None]:
# We are going to print the last 5 rows of our dataset by using .tail()
df_pokemon.tail(n=5)

In [None]:
# Now we are going to use the Pandas library to create cool data visualizations
# Pandas allows you to make plots of Series or DataFrame using function
# 'pandas.DataFrame.plot'

In [None]:
# We are now going to create a scatter plot to show the relationship between
# Pokemons' offensive and defensive capabilities. To create a scatter plot
# we use .scatter(arg1, arg2)
# NOTICE, our x and y arguments are the name of our coloumns in our data set
df_pokemon.plot.scatter(x='attack', y='defense')

In [None]:
# Using Pandas, we will also be able to create a "color bar" that will allow us
# to differentiate our data. The following line of code will add color variation
# that will allow us to learn whether Pokemons with lower offensive and
# defensive capabilities are easily captured

# In order to do so, all we have to add is a 'c' paramter (shorthand for color)
# and its corresponding value to its corresponding coloumn name

# NOTE: once again, 'capture_rate' is a name of a coloumn in our data set
df_pokemon.plot.scatter(x='attack', y='defense', c='capture_rate')

In [None]:
# However, to change the data, you can also assign the value of the c paramter
# as a color as a string
# In this example we are change the scatter plot to the color green
df_pokemon.plot.scatter(x='attack', y='defense', c='green')

In [None]:
# We can also create bar graphs by using Pandas. To create a bar graph, we use
# .bar(arg1, arg2)
df_pokemon.plot.bar(x='name', y='speed')

In [None]:
# As you see from above, our bar chart is crowded... how can we fix this?
# String slicing!!!
# By using string slicing, we will be able to only plot a certain "chunk" of the
# data set. 

In [None]:
# The following line of code uses string slicing to only print the first 5 only
df_pokemon[0:5].plot.bar(x='name', y='speed')

In [None]:
# Sometimes your chart does not provide you the appropiate labels so you will
# need to name them yourself. This can be done easily.

# Craete a reference variable. This variable will store our chart
barGraph = df_pokemon[0:5].plot.bar(x='name', y='speed')
# Call our reference variable, and use .set_xlabel() and .set_ylabel() to set
# x and y label names respectively
barGraph.set_xlabel('Name')
barGraph.set_ylabel('Speed')

In [None]:
# Changing the bar color by passing in the color argument and assigning the
# value as the desired color as a string. In this example we are chaning our
# blue bar chart into red
barGraph = df_pokemon[0:5].plot.bar(x='name', y='speed', color='red')
barGraph.set_xlabel('Name')
barGraph.set_ylabel('Speed')

In [None]:
# Try to replicate the same bar graph above and change it to the color yellow

In [None]:
# Lets output our entire data set (data frame)
df_pokemon

In [None]:
# If you want to print items in a specfic row, you can use .loc()
# THis print this first row and all of its corresponding columns
# The first arg inside the bracket is the index row, and the ":" means for "everything"
df_pokemon.loc[0,:]

In [None]:
# If you want to print all the items in a specific column, you can use bracket
# notation and pass in the name of the coloumn
df_pokemon['name']

In [12]:
# For first row and some specific column
# The first arg is the row index number, and the second arg is the coloumn name
df_pokemon.loc[0,'name']

'Bulbasaur'