# Week 8 - Visualizing Data

matplotlib과 seaborn을 이용하여 데이터를 시각화해보자.

## Weather Data Visualization

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

In [None]:
# Load Data
data = pd.read_csv("data/weather-mod.csv")

In [None]:
data.index = data.date
data = data.drop(["date"], axis=1)

In [None]:
data.columns

In [None]:
data.head()

### mean_temp와 mean_humidity로 scatterplot 을 그려보자

In [None]:
plt.plot('mean_temp', 'mean_humidity', data=data, linestyle='none', marker='o')
plt.show()

### cloud_cover와 mean_temp는 상관관계가 있을까?

In [None]:
sb.regplot(x=data["cloud_cover"], y=data["mean_temp"])

In [None]:
sb.regplot(x=data["mean_humidity"], y=data["mean_temp"])

### 모든 변인들의 상관관계 그래프를 그려보자

In [None]:
sub_data = data[['mean_temp', 'mean_dew', 'mean_humidity', 'min_humidity', 
       'mean_pressure', 'mean_visibility', 'mean_wind', 'precipitation',
       'cloud_cover']]

In [None]:
sb.pairplot(sub_data, kind="reg")
plt.show()

In [None]:
sb.pairplot(sub_data, kind="scatter")
plt.show()

## Gapminder Data Visualization

In [None]:
def read_gapminder_data(filename, colname):
    data = pd.read_csv("data/gapminder/{}".format(filename), header=None)
    data.columns = ["country", colname]
    data.index = data.country
    data = data.drop(['country'], axis=1)
    return data

In [None]:
employment = read_gapminder_data("employment.csv", "employment")
life_exp = read_gapminder_data("life_expectancy.csv", "life_exp")
gdp = read_gapminder_data("gdp_per_capita.csv", "gdp")

In [None]:
frames = [employment, life_exp, gdp]
gm_data = pd.concat(frames, axis=1)

In [None]:
gm_data.head()

### 변인들 간의 상관관계를 그려보자

In [None]:
sb.pairplot(gm_data, kind="reg")
plt.show()

### Bubble Graph
Bubble Graph는 세개의 변인을 이용한다.

x, y 이외에 z는 점의 크기로 표현된다..

In [None]:
gm_subset = gm_data.sample(n=50)
x = gm_subset.index
y = gm_subset.life_exp
z = gm_subset.gdp

In [None]:
from matplotlib.pyplot import figure

figure(num=None, figsize=(12, 10), dpi=80, facecolor='w', edgecolor='k')

# Change color with c and alpha. I map the color to the X axis value.
plt.scatter(x, y, s=z/10, alpha=0.5)
 
# Add titles (main and on axis)
plt.xticks(rotation=90)
plt.xlabel("Country")
plt.ylabel("Life Expectancy")
plt.title("Gapminder - Life Expectancy and GDP")

### gdp를 기준으로 sorting 한 후 그래프를 다시 그려보자.

In [None]:
gm_subset_sorted = gm_subset.sort_values(['gdp'], ascending=True)
gm_subset_sorted

In [None]:
x = gm_subset_sorted.index
y = gm_subset_sorted.life_exp
z = gm_subset_sorted.gdp

In [None]:
from matplotlib.pyplot import figure

figure(num=None, figsize=(12, 10), dpi=80, facecolor='w', edgecolor='k')

# Change color with c and alpha. I map the color to the X axis value.
plt.scatter(x, y, s=z/10, alpha=0.5)
 
# Add titles (main and on axis)
plt.xticks(rotation=90)
plt.xlabel("Country")
plt.ylabel("Life Expectancy")
plt.title("Gapminder - Life Expectancy and GDP")

### Seaborn 을 이용해서 위의 그래프를 다시 그려보자.

In [None]:
# 삭제했던 "country" 컬럼이 다시 필요.
gm_subset_sorted["country"] = gm_subset_sorted.index

In [None]:
# Plot miles per gallon against horsepower with other semantics
bubble = sb.relplot(x="country", y="life_exp", hue="country", size="gdp",
            sizes=(40, 1500), alpha=.5, palette="muted",
            height=8, data=gm_subset_sorted)
bubble.set_xticklabels(rotation=90)
