In [None]:
import numpy as np
import pandas as pd
from PIL import Image

In [None]:
df_can = pd.read_excel(
    './Canada.xlsx',
    sheet_name='Canada by Citizenship',
    skiprows=range(20),
    skipfooter=2,
)
df_can.head()
df_can.shape

In [None]:
df_can.drop(['AREA', 'REG', 'DEV', 'Type', 'Coverage'], axis=1, inplace=True)
df_can.rename(columns={ 'OdName': 'Country', 'AreaName': 'Continent', 'RegName': 'Region'}, inplace=True)

df_can.columns = list(map(str, df_can.columns))

df_can.set_index('Country', inplace=True)

df_can['Total'] = df_can.sum(axis=1)

years = list(map(str, range(1980, 2014)))

df_can.head()

In [None]:
df_can.describe

In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt 
import matplotlib.patches as mpatches
mpl.style.use('ggplot')

In [None]:
# Waffle Charts 

df_dsn = df_can.loc[['Denmark', 'Sweden', 'Norway']]

df_dsn

In [None]:
# Step 1
# Menentukan Total dari kategori / 3 negara

total_values = sum(df_dsn['Total'])
total_values
category_proportions = [float(value / total_values) for value in df_dsn['Total']]

category_proportions

In [None]:
# step 2
# Ukuran waffle chart

width = 40
height = 10

total_num_tiles = width * height

total_num_tiles

In [None]:
# Step 3 
# Menghitung bar di tiap negara
tiles_per_category = [round(proportion * total_num_tiles) for proportion in category_proportions]

print(tiles_per_category)

In [None]:
# Step 4 membuat matrix
waffle_chart = np.zeros((height, width))
# len(waffle_chart)

category_index = 0
tile_index = 0

for col in range(width):
    for row in range(height):
        tile_index += 1
        # waffle_chart[row, col] = category_index
        if tile_index > sum(tiles_per_category[0:category_index]):
            category_index += 1
        waffle_chart[row, col] = category_index



In [None]:
# Step 5 Visualkan matrix kamu
fig = plt.figure()
colormap = plt.cm.coolwarm
plt.matshow(waffle_chart, cmap=colormap)
plt.colorbar()
plt.title('Waffle chart Negara Denmark, Sweden, Norway')
ax = plt.gca()

ax.set_xticks(np.arange(-.5, (width), 1), minor=True)
ax.set_yticks(np.arange(-.5, (height), 1), minor=True)

ax.grid(which='minor', color='w', linestyle='-', linewidth=2)

plt.xticks([])
plt.yticks([])

# Step 6 Tambah Legenda

values_cumsum = np.cumsum(df_dsn['Total'])
total_values = values_cumsum[len(values_cumsum) -1]

legend_handles = []
for i, category in enumerate(df_dsn.index.values):
    label_str = category + ' (' + str(df_dsn['Total'][i]) + ')'
    color_val = colormap(float(values_cumsum[i])/total_values)
    legend_handles.append(mpatches.Patch(color=color_val, label=label_str))
plt.legend(handles=legend_handles, loc='lower center', ncol=len(df_dsn.index.values), bbox_to_anchor=(0., -0.2, 0.95, .1))

In [None]:
# Step 7 Wrap to function

def create_waffle_chart(categories, values, height, width, colormap, value_sign=''):
    # compute the proportion of each category with respect to the total
    total_values = sum(values)
    category_proportions = [(float(value) / total_values) for value in df_dsn['Total']]

    # compute tht total number of tiles
    total_num_tiles = width * height # total number of tiles
    print('Total number of tiles is', total_num_tiles)
    # Compute the number of tiles for each category
    tiles_per_category = [round(proportion * total_num_tiles) for proportion in category_proportions]

    # print out number of tiles per category
    for i, tiles in enumerate(tiles_per_category):
        print(df_dsn.index.values[i] + ': ' + str(tiles))
    # initialize the waffle chart as an empty matrix
    waffle_chart = np.zeros((height, width))
    # define indices to loop through waffle chart
    category_index = 0
    tile_index = 0

    # populate the waffle chart
    for col in range(width):
        for row in range(height):
            tile_index += 1
            # if the number of tiles populated for the current category
            # is equal to its corresponding allocated tiles
            if tile_index > sum(tiles_per_category[0: category_index]):
                # ... proceed to the next category
                category_index += 1
            # Set the class value to an integer, which increases with class
            waffle_chart[row, col] = category_index
    # instantiate a new figure object
    fig = plt.figure()
    # use matshow to display the waffle chart
    colormap = plt.cm.coolwarm
    plt.matshow(waffle_chart, cmap=colormap)
    plt.colorbar()

    plt.title('Waffle chart Negara Denmark, Sweden, Norway')
    ax = plt.gca()
    # set minor ticks by axis
    ax.set_xticks(np.arange(-.5, (width), 1), minor=True)
    ax.set_yticks(np.arange(-.5, (height), 1), minor=True)
    # add gridlines based on minor ticks
    ax.grid(which='minor', color='w', linestyle='-', linewidth=2)

    plt.xticks([])
    plt.yticks([])

    # compute cummulative sum of individual categories to match color schemes between chart and legend 
    values_cumsum = np.cumsum(df_dsn['Total'])
    total_values = values_cumsum[len(values_cumsum) -1]

    # create_legend
    legend_handles = []
    for i, category in enumerate(df_dsn.index.values):
        label_str = category + ' (' + str(df_dsn['Total'][i]) + ')'
        color_val = colormap(float(values_cumsum[i])/total_values)
        legend_handles.append(mpatches.Patch(color=color_val, label=label_str))
    # add legend
    plt.legend(handles=legend_handles, loc='lower center', ncol=len(df_dsn.index.values), bbox_to_anchor=(0., -0.2, 0.95, .1))

width = 40
height = 10
categories = df_dsn.index.values
values = df_dsn['Total']
colormap = plt.cm.coolwarm

create_waffle_chart(categories, values, height, width, colormap)

In [None]:
!python3 -m pip install pywaffle

In [None]:
!python3 -m pip install wordcloud

import wordcloud



In [None]:
from wordcloud import WordCloud, STOPWORDS
!wget --quiet https://raw.githubusercontent.com/ardhiraka/PFDS_sources/master/alice_novel.txt -O alice_novel.txt
alice_novel = open('./alice_novel.txt', 'r').read()


# alice_novel
# unique stopwords
stopwords = set(STOPWORDS)
stopwords.add('said')

alice_wc = WordCloud(
    background_color='white',
    max_words=2000,
    stopwords=stopwords,
)

alice_wc.generate(alice_novel)

# # Visualize most words from 2000

fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)
plt.imshow(alice_wc, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
!wget --quiet https://github.com/ardhiraka/PFDS_sources/raw/master/alice_mask.png -O alice_mask.png

alice_mask = np.array(Image.open('./alice_mask.png'))

fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)

plt.imshow(alice_mask, cmap=plt.cm.gray, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
# instantiate a word cloud object
alice_wc = WordCloud(background_color='white', max_words=2000, mask=alice_mask, stopwords=stopwords)

alice_wc.generate(alice_novel)

fig = plt.figure()
fig.set_figwidth(14)
fig.set_figheight(18)

plt.imshow(alice_wc, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
!python3 -m pip install seaborn

import seaborn as sns

In [None]:

years = list(map(str, range(1980, 2014)))
# Jumlahkan total immigrant dari tiap taun
df_tot = pd.DataFrame(df_can[years].sum(axis=0))

df_tot.index = map(float, df_tot.index)
df_tot.reset_index(inplace=True)

df_tot.columns = ['year', 'total']

df_tot

In [None]:
# Regplot Seaborn
figure, axis = plt.subplots(figsize=(18, 8))

ax = sns.regplot(x='year', y='total', data=df_tot, color='green', marker='x', ax=axis)

In [None]:
plt.figure(figsize=(15, 10))
sns.set(font_scale=1.5)
sns.set_style('whitegrid')
ax=sns.regplot(x='year', y='total', data=df_tot, color='green', marker='+', scatter_kws={'s': 200 })
ax.set(xlabel='Year', ylabel='Total Immigration')
ax.set_title('Total Immigration to Canada from 1980 - 2013')

In [None]:
# Visualize Regplot data 3 countries
df_countries = df_can.loc[['Denmark', 'Norway', 'Sweden'], years].transpose()

df_total = pd.DataFrame(df_countries.sum(axis=1))
df_total.reset_index(inplace=True)
df_total.columns = ['year', 'total']
df_total['year'] = df_total['year'].astype(int)

plt.figure(figsize=(15, 10))
sns.set(font_scale=1.5)
sns.set_style('whitegrid')
ax = sns.regplot(x='year', y='total', data=df_total, color='green', marker='+', scatter_kws={ 's': 200 })
ax.set(xlabel='Year', ylabel='Total Immigration')
ax.set_title('Total Immigration from Denmark, Sweden, and Norway to Canada from 1980 - 2013')

In [None]:
# !python3 -m pip install folium

import folium

# dir(folium)

world_map = folium.Map()

world_map

In [None]:
world_map = folium.Map(location=[-6.992620, 110.428009], zoom_start=8)

# HACKTIV8
world_map = folium.Map(location=[-6.2607187, 106.7794275], zoom_start=15)

world_map

In [None]:
# A Stamen Toner Maps
world_map = folium.Map(location=[-6.9902851, 110.4207485], zoom_start=13, tiles='Stamen Toner')
world_map

In [None]:
# B. Stamen Terrain Maps
world_map = folium.Map(location=[-6.9902851, 110.4207485], zoom_start=13, tiles='Stamen Terrain')
world_map

In [None]:
df_incidents = pd.read_csv('https://raw.githubusercontent.com/ardhiraka/PFDS_sources/master/Police_Department_Incidents_-_Previous_Year__2016_.csv')
df_incidents.head()

In [None]:
# df_incidents.
df_incidents100 = df_incidents.iloc[:100, :]
df_incidents100.columns

In [None]:
latitude = 37.77
longitude = -122.42

sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)
sanfran_map

In [None]:
incidents = folium.map.FeatureGroup()

for lat, lng in zip(df_incidents100.Y, df_incidents100.X):
    incidents.add_child(
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6,
        )
    )

sanfran_map.add_child(incidents)

In [None]:
sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, label, descrip in zip(df_incidents100.Y, df_incidents100.X, df_incidents100.Category, df_incidents100.Descript):
        popup_message = 'Category: ' + label + ' ' + 'Description: ' + descrip
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            color='yellow',
            fill=True,
            popup=popup_message,
            fill_color='blue',
            fill_opacity=0.6,
        ).add_to(sanfran_map)

sanfran_map

In [None]:
incidents = folium.map.FeatureGroup()

for lat, lng, label in zip(df_incidents100.Y, df_incidents100.X, df_incidents100.Category):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='yellow',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6,
    )
    folium.Marker([lat, lng], popup=label).add_to(sanfran_map)

sanfran_map.add_child(incidents)

In [None]:
# Add Cluster
from folium import plugins

# Generate
sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)

# Generate wadah buat marker cluster
incidents=plugins.MarkerCluster().add_to(sanfran_map)

for lat, lng, label in zip(df_incidents100.Y, df_incidents100.X, df_incidents100.Category):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(incidents)

sanfran_map

In [None]:
# !wget --quiet https://raw.githubusercontent.com/ardhiraka/PFDS_sources/master/world_countries.json


# df_can.reset_index(inplace=True)
# df_can.head()
world_geo = r'world_countries.json'

world_map = folium.Map(location=[0, 0], zoom_start=2, tiles='Mapbox Bright')
world_map.choropleth(
    geo_data=world_geo,
    data=df_can,
    columns=['Country', 'Total'],
    key_on='feature.properties.name',
    fill_color='YlOrRd', # Yellow Orange Red
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Immigration to Canada'
)
world_map