# Week 03: Data Viz

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from palmerpenguins import load_penguins
df = load_penguins()
print(type(df))
df

In [None]:
print(df.describe())
print(df.dtypes)
print(df.columns)

#### Indexing

In [None]:
print(df.values)
i=1
j=0
print(df.loc[i])
print(df.iloc[i,j])
print(df[['bill_length_mm','island']])
print(df.query("year > 2007"))

In [None]:
df[0:2]

In [None]:
df.iloc[0,0]

In [None]:
# First row, access "body mass g" col
df.loc[0].body_mass_g 

In [None]:
columns = ["species", "island"] # using square brackets to represent a list

df[columns][0:2]                # using square brackets for indexing

In [None]:
df.query("year > 2007")


# Multiple conditions
df[(df.sex == "female") & (df.year > 2007)]

### Plotting

In [None]:
# X values
x = [1,2,3]
# Y values
y = [4,5,6]     # automatically matches each (x,y) pair based on the same index.  
	
# Plotting the points
plt.plot(x, y)
	
# Naming the X and Y axes
plt.xlabel('x - axis')
plt.ylabel('y - axis')
	
# Adding a title
plt.title('X-Y')
	
# Showing the plot
plt.show()

In [None]:
a = [1, 2, 3, 4, 5]
b = [0, 0.6, 0.2, 15, 10, 8, 16, 21]
plt.plot(a)
# o is for circles
plt.plot(b, "o")

# Get current axes and plot the legend
ax = plt.gca()
ax.legend(['Line', 'Dots'])

plt.show()

In [None]:
fig = plt.figure(figsize =(10, 5))
sub1 = plt.subplot(2, 1, 1) # two plots, or 2 rows, one column, position 1
sub2 = plt.subplot(2, 1, 2) # two plots, or 2 rows, one column, position 2
sub1.plot(a, 'sb') # squares, blue
sub1.annotate("Squares", (1,1))
sub2.plot(b, 'or') # circles, red
sub2.annotate("Circles", (1,1)) 
plt.show()

#### Seaborn plots

In [None]:
sns.histplot(df['island'],kde=True,bins=10, color="pink")

#### Joint plots

In [None]:
sns.jointplot(data=df, x="bill_length_mm", y="bill_depth_mm", marginal_kws=dict(bins=20)) 

In [None]:
sns.pairplot(df)

In [None]:
g = sns.boxplot(x = 'island',
            y ='body_mass_g',
            hue = 'species',    # can change this
            data = df,
            palette=['#FF8C00','#159090','#A034F0'],  # custom, could use inbuilt ones
            linewidth=0.3)
g.set_xlabel('Island')
g.set_ylabel('Body Mass')
plt.show() 


# note how before we provided "x" and "y" as actual columns from the df (e.g. `df.island`)
# but now we give the dataframe as a 'data' argument and separately provide the names
# of the columns we want to use as our x and y (columns given as *strings*)

In [None]:
g = sns.lmplot(x="flipper_length_mm",
               y="body_mass_g",
               hue="species",
               height=7,
               data=df,
               palette=['#FF8C00','#159090','#A034F0'])
g.set_xlabels('Flipper Length')
g.set_ylabels('Body Mass')
plt.show() 

# remember, we can change the plotting of colours just by passing another name of a column, 
# because we are giving all the data as `df`

#### Multiple figures

In [None]:
import matplotlib.pyplot as pltt
fig ,ax = pltt.subplots(figsize=(15,12), ncols=2,nrows=2) # make a 2x2 grid
sns.swarmplot(data=df,x='species',y='body_mass_g',ax=ax[0,0], hue='species') 
sns.violinplot(data=df,x='species',y='body_mass_g',ax=ax[0,1])
sns.boxplot(data=df,x='species',y='body_mass_g',ax=ax[1,0])
sns.barplot(data=df,x='species',y='body_mass_g',ax=ax[1,1])
pltt.show()

### Saving file

In [None]:
df.to_csv?

In [None]:
df.to_csv("my_penguins.csv")
df_pengiuns = pd.read_csv("my_penguins.csv")
df_pengiuns.head()

In [None]:
df.to_csv("my_penguins.tsv", sep="\t") # tab character as delimiter

#### Test yourself

In [None]:
# load iris dataset 
iris = sns.load_dataset("iris")
iris[0:2]

In [None]:
# 1. Distributon plot of petal_length 
sns.histplot(
    data=iris,
    x="sepal_width",
    kde=True,
)

In [None]:
# Pairplot between all variables
sns.pairplot(iris)

In [None]:
list(iris.columns)

In [None]:

fig ,ax = pltt.subplots(figsize=(15,12), ncols=2,nrows=2)
sns.swarmplot(data=iris,x='species',y='petal_length',ax=ax[0,0],hue='species')
sns.violinplot(data=iris,x='species',y='petal_length',ax=ax[0,1], hue="species")
sns.boxplot(data=iris,x='species',y='petal_length',ax=ax[1,0], hue="species")
sns.barplot(data=iris,x='species',y='petal_length',ax=ax[1,1], hue="species")
pltt.show()