# Data Visualization with Plotly (HTML output)

## Die

In [9]:
"""Die"""
from random import randint

from plotly.graph_objs import Bar, Layout
from plotly import offline

class Die:
    """A class representing a sigle die."""

    def __init__(self, num_sides=6):
        """Initialize attributes of the die."""
        self.num_sides = num_sides
        self.results = 0
        self.frequencies = 0

    def roll(self):
        """Return a random value between 1 and number of sides."""
        return randint(1, self.num_sides)

    def mult_roll(self, numbers_roll=1):
        """Return results of x times rolling in a list."""
        self.numbers_roll = numbers_roll
        self.results = [self.roll() for num_roll in range(numbers_roll)]
        return self.results

    def frequency(self):
        """
        Count the frequency of each side with given results.
        """
        self.frequencies = [
            self.results.count(value) for value in range(1, self.num_sides + 1)
            ]
        return self.frequencies

    def one_die_histogram(self):
        """After rolling the die, visualize the results of the die roll."""
        x_values = list(range(1, self.num_sides + 1))
        data = [Bar(x=x_values, y=self.frequencies)]

        x_axis_config = {'title': 'Result'}
        y_axis_config = {'title': 'Frequencies'}
        my_layout = Layout(
            title=f"Results of rolling one D{self.num_sides} {self.numbers_roll} times",
            xaxis=x_axis_config, yaxis=y_axis_config
            )
        offline.plot(
            {'data':data, 'layout':my_layout},
            filename=f"d{self.num_sides}.html"
            )

# Plotting
# D6 = Die(6)
# results = D6.mult_roll(100)
# print(results)
# freq = D6.frequency()
# print(freq)
# D6.one_die_histogram()

[5, 2, 4, 4, 2, 5, 4, 5, 4, 5, 1, 3, 6, 5, 2, 4, 1, 2, 5, 4, 6, 1, 5, 1, 6, 1, 2, 1, 5, 6, 1, 2, 3, 1, 5, 6, 6, 4, 3, 1, 6, 2, 4, 3, 5, 4, 3, 6, 6, 4, 4, 4, 4, 3, 4, 6, 6, 5, 4, 2, 2, 1, 2, 2, 3, 4, 3, 4, 2, 3, 1, 3, 3, 2, 3, 5, 1, 1, 3, 6, 6, 1, 4, 6, 3, 5, 6, 4, 5, 6, 5, 3, 6, 4, 5, 5, 1, 1, 6, 6]
[16, 13, 15, 20, 17, 19]


## Dice

In [17]:
"""Dice"""
from random import randint

from plotly.graph_objs import Bar, Layout
from plotly import offline

#from die import Die

class Dice:
    """A class respresent multple dices."""

    def __init__(self, num_sides):
        """Initialize attributes of multiple dice."""
        self.num_dice = len(num_sides) # number of dice.
        self.num_sides = num_sides # number of sides for each dice in a list.
        self.dice = self._create_dice() # a set of dice in a list.

        self.numbers_roll = 0
        self.results = []
        self.frequencies = []
        self.max_results = sum(self.num_sides)

    def _create_dice(self):
        """Create all the dice instance."""
        dice = [Die(self.num_sides[num]) for num in range(self.num_dice)]
        return dice

    def roll_all(self):
        """Return a sum of value generated by rolling all the dices once."""
        results = [self.dice[num].roll() for num in range(self.num_dice)]
        return sum(results)

    def multi_roll_all(self, numbers_roll=2):
        """Return the results of rolling all the dice at once,
        and roll for x times in a list.
        """
        self.numbers_roll = numbers_roll
        self.results = [self.roll_all() for roll_num in range(numbers_roll)]
        return self.numbers_roll, self.results

    def frequency(self):
        """Count the frequency of the dice roll outcome."""
        self.frequencies = [
            self.results.count(value)
            for value in range(self.num_dice, self.max_results + 1)
            ]
        return self.frequencies

    def dice_histogram(self):
        """Visualize the results of the dice roll."""
        x_values = list(range(self.num_dice, self.max_results + 1))
        y_values = [Bar(x=x_values, y=self.frequencies)]

        x_axis_config = {'title': 'Result', 'dtick': 1}
        y_axis_config = {'title': 'Frequencies'}
        my_layout = Layout(
            title=f"Results of rolling D{self.num_sides} {self.numbers_roll} times",
            xaxis=x_axis_config, yaxis=y_axis_config
            )
        offline.plot(
            {'data':y_values, 'layout':my_layout},
            filename="results.html"
            )# # # # # # 

## Earthquake Explore

In [None]:
"""Earthquake Explore"""
import json
import re

from plotly.graph_objs import Scattergeo, Layout
from plotly import offline

def main(input_file_path, output_file_path):
    """Visualize earthquake data with geoJson data."""
    # Explore the structure of the data
    filename = input_file_path
    with open(filename) as fhand:
        all_eq_data = json.load(fhand)

    readable_file = 'earthquake/data/readable_eq_data.json'
    with open(readable_file, 'w') as fhand:
        json.dump(all_eq_data, fhand, indent=4)

    all_eq_dicts = all_eq_data['features']
    #print(len(all_eq_dicts)) # numbers of earthquake records
    title = all_eq_data['metadata']['title']

    hover_texts = [eq_dict['properties']['title'] for eq_dict in all_eq_dicts]
    mags = [eq_dict['properties']['mag'] for eq_dict in all_eq_dicts]
    lons = [eq_dict['geometry']['coordinates'][0] for eq_dict in all_eq_dicts]
    lats = [eq_dict['geometry']['coordinates'][1] for eq_dict in all_eq_dicts]

    #Map the earthquake
    data = [{
        'type': 'scattergeo',
        'lon': lons,
        'lat': lats,
        'text': hover_texts,
        'marker': {
            'size': [5*mag for mag in mags], # Set the size of marker based on mag
            'color': mags,
            'colorscale': 'Agsunset',
            'reversescale': True,
            'colorbar': {'title': 'Magnitude'},
        },
    }]
    my_layout = Layout(title=f'{title}')

    fig = {'data': data, 'layout': my_layout}
    offline.plot(fig, filename=output_file_path)

# input_file_path = 'earthquake/data/m4.5+_eq_data_past_month.json'
# output_file_path = 'earthquake/output/global_m4.5+_earthquakes_past_month.html'
# main(input_file_path, output_file_path)

## Github Python Project Visualization

In [None]:
"""Github Python Project Visualization"""
import requests

from plotly.graph_objs import Bar
from plotly import offline

import hidden # import personal access token

def main(search_url, headers):
    """Visualize the Most-Starred Python Projects on Github."""
    # Make an API call to store the response.
    r = requests.get(search_url, headers=headers)
    print(f"Status code: {r.status_code}")

    # Store API response in a variable
    response_dict = r.json()

    # # Process results.
    # print(response_dict.keys())

    # Explore information about the repositories.
    repo_dicts = response_dict['items']
    print(f"Repositories returned: {len(repo_dicts)}")

    # Summary for the repositories.
    # print("\nSelected information about each repository:")
    # for repo_dict in repo_dicts:
    #     print(f"Name: {repo_dict['name']}")
    #     print(f"Owner: {repo_dict['owner']['login']}")
    #     print(f"Stars: {repo_dict['stargazers_count']}")
    #     print(f"Repository: {repo_dict['html_url']}")
    #     print(f"Created: {repo_dict['created_at']}")
    #     print(f"Updated: {repo_dict['updated_at']}")
    #     print(f"Description: {repo_dict['description']}\n")

    repo_links, stars, labels = [], [], []
    for repo_dict in repo_dicts:
        # x_axis
        repo_name = repo_dict['name']
        repo_url = repo_dict['html_url']
        repo_link = f"<a href='{repo_url}'>{repo_name}</a>"
        repo_links.append(repo_link)
        # y_axis
        stars.append(repo_dict['stargazers_count'])
        # hover_text
        owner = repo_dict['owner']['login']
        description = repo_dict['description']
        label = f"{owner}<br />{description}"
        labels.append(label)

    # Make Visualization
    data = [{
        'type': 'bar',
        'x': repo_links,
        'y': stars,
        'hovertext': labels,
        'marker': {
            'color': 'rgb(60, 100, 150)',
            'line': {'width': 1.5, 'color': 'rgb(25, 25, 25)'}
        },
        'opacity': 0.6,
    }]

    my_layout = {
        'title': 'Most-Starred Python Projects on Github',
        'titlefont': {'size': 28},
        'xaxis': {
            'title': 'Repository',
            'titlefont': {'size': 24},
            'tickfont': {'size': 14},
        },
        'yaxis': {
            'title': 'Stars',
            'titlefont': {'size': 24},
            'tickfont': {'size': 14},
        },
    }

    fig = {'data': data, 'layout': my_layout}
    offline.plot(fig, filename='github_visual/python_repos.html')

def rate_limit(rate_url, headers):
    """Check the remaining rate limit for search."""
    r_rate = requests.get(rate_url, headers=headers)
    rate_dict = r_rate.json()
    rate_remaining = rate_dict['resources']['search']['remaining']
    print(f"Remaining Rate: {rate_remaining}")

search_url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
rate_url = 'https://api.github.com/rate_limit'
headers = {
    'Accept': 'application/vnd.github.vs+json',
    'Authorization': 'token ' + f'{hidden.token_secret()}'
    }

main(search_url, headers)
rate_limit(rate_url, headers)

## World Fire Visualization

In [None]:
"""World Fire"""
import csv
from datetime import datetime

from plotly.graph_objs import Scattergeo, Layout
from plotly import offline

def main(input_file_path, output_file_path):
    """Visualize global fire data with csv files."""
    filename = input_file_path
    fhand = open(filename)
    reader = csv.reader(fhand)
    header_row = next(reader)

    header= {}
    for index, column_header in enumerate(header_row):
        header[column_header] = index

    lats, lons, brightnesses, hover_texts = [], [], [], []
    for row in reader:
        try:
            lat = float(row[header['latitude']])
            lon = float(row[header['longitude']])
            brightness = float(row[header['brightness']])
            hover_text = f"{row[header['acq_date']]}, {row[header['acq_time']]}, "
            hover_text += f" {row[header['daynight']]}"
        except ValueError:
            print(f"Missing data for {row[header['acq_date']]}")
        else:
            lats.append(lat)
            lons.append(lon)
            brightnesses.append(brightness)
            hover_texts.append(hover_text)

    #Map the earthquake
    data = [{
        'type': 'scattergeo',
        'lon': lons,
        'lat': lats,
        'text': hover_texts,
        'marker': {
            #'size': [0.1*brightness for brightness in brightnesses],
            'color': brightnesses,
            'colorscale': 'Agsunset',
            'reversescale': True,
            'colorbar': {'title': 'Brightness'},
        },
    }]
    my_layout = Layout(title='Global Active Fire Past Day')

    fig = {'data': data, 'layout': my_layout}
    offline.plot(fig, filename=output_file_path)
    fhand.close()

# input_file_path = 'global_fire/data/MODIS_C6_Global_24h.csv'
# output_file_path = 'global_fire/output/global_fire_past_day.html'
# main(input_file_path, output_file_path)

## Hacker News API data visualization

### API
Explore the structure of data

In [None]:
"""Hacker News API"""
import requests
import json

url = 'https://hacker-news.firebaseio.com/v0/item/19155826.json'
r = requests.get(url)
print(f"Status Code: {r.status_code}")

# Explore the structure of the data.
response_dict = r.json()
readable_file = 'hacker_news_visual/data/readable_hn_data.json'
with open(readable_file, 'w') as fhand:
    json.dump(response_dict, fhand, indent=4)

### Scraping

In [None]:
"""Hacker News API"""
from operator import itemgetter
import json

import requests

def main(url):
    """Save the most active discussion on Hacker News in json."""
    # Make an API call to store the response.
    id_url = url
    response_obj = requests.get(id_url)
    print(f"Status Code: {response_obj.status_code}")

    # Process information about each submission.
    submission_ids = response_obj.json()
    print(f"Retrieved ids: {len(submission_ids)}\n")
    submission_dicts = []

    for submission_id in submission_ids[:50]:
        # Make a seperate API call for each submission.
        url = f"https://hacker-news.firebaseio.com/v0/item/{submission_id}.json"
        response_obj = requests.get(url)
        print(f"id: {submission_id}\tStatus Code: {response_obj.status_code}")
        response_dict = response_obj.json()

        try:
            # Build a dictionary for each article.
            submission_dict = {
                'title': response_dict['title'],
                'hn_link': f"https://news.ycombinator.com/item?id={submission_id}",
                'comments': response_dict['descendants'],
            }
            submission_dicts.append(submission_dict)
        except KeyError:
            print(f"Failure to Find the corresponding key: {submission_id}")
            continue

    submission_dicts = sorted(
        submission_dicts, key=itemgetter('comments'), reverse=True)

    for submission_dict in submission_dicts:
        print(f"\nTitle: {submission_dict['title']}")
        print(f"Discussion Links: {submission_dict['hn_link']}")
        print(f"Comments: {submission_dict['comments']}")

    # Save the retrieved data in json file.
    saved_file = 'hacker_news/data/submissions.json'
    with open(saved_file, 'w') as fhand:
        json.dump(submission_dicts, fhand)

URL = 'https://hacker-news.firebaseio.com/v0/topstories.json'
main(URL)


### Visualization

In [None]:
"""Visualize the Hacker News Data"""
import json

from plotly.graph_objs import Bar
from plotly import offline

def main(input_file_path, output_file_path):
    """Visualize retrieved Hacker News data."""
    file_path = input_file_path
    submission_dicts = []
    with open(file_path) as fhand:
        submission_dicts = json.load(fhand)

    labels, comments = [], []
    for submission_dict in submission_dicts:
        # x_axis
        title = submission_dict['title']
        hn_link = submission_dict['hn_link']
        label = f"<a href='{hn_link}'>{title}</a>"
        labels.append(label)
        # y_axis
        comments.append(submission_dict['comments'])

    # Make Visualization
    data = [{
        'type': 'bar',
        'x': labels,
        'y': comments,
        'marker': {
            'color': 'rgb(60, 100, 150)',
            'line': {'width': 1.5, 'color': 'rgb(25, 25, 25)'}
        },
        'opacity': 0.6,
    }]

    my_layout = {
        'title': 'Most-Active Discussion on Hacker News',
        'titlefont': {'size': 28},
        'xaxis': {
            'title': 'Repository',
            'titlefont': {'size': 24},
            'tickfont': {'size': 10},
        },
        'yaxis': {
            'title': 'Stars',
            'titlefont': {'size': 24},
            'tickfont': {'size': 14},
        },
    }

    fig = {'data': data, 'layout': my_layout}
    offline.plot(fig, filename=output_file_path)

INPUT_FILE_PATH = 'hacker_news/data/submissions.json'
OUTPUT_FILE_PATH = 'hacker_news/hacker_news.html'
main(INPUT_FILE_PATH, OUTPUT_FILE_PATH)

### Building Database

In [None]:
"""Hacker News API"""
from operator import itemgetter

import requests
import json
import sqlite3

conn = sqlite3.connect('hacker_news/data/submissions.sqlite')
cur = conn.cursor()

# Make some fresh tables using executescript()
cur.executescript('''
DROP TABLE IF EXISTS Submissions;

CREATE TABLE Submissions (
    id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    submission_id  TEXT UNIQUE,
    title TEXT  UNIQUE,
    discussion_link TEXT UNIQUE,
    comments INTEGER
);
''')

# Make an API call to store the response.
id_url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
r = requests.get(id_url)
print(f"Status Code: {r.status_code}")

# Process information about each submission.
submission_ids = r.json()
print(f"Retrieved ids: {len(submission_ids)}\n")
submission_dicts = []
count = 1

for submission_id in submission_ids[:50]:
    # Make a seperate API call for each submission.
    url = f"https://hacker-news.firebaseio.com/v0/item/{submission_id}.json"
    r = requests.get(url)
    print(f"{count} - id: {submission_id}\tStatus Code: {r.status_code}")
    response_dict = r.json()

    try:
        # Build a dictionary for each article.
        submission_dict = {
            'title': response_dict['title'],
            'hn_link': f"https://news.ycombinator.com/item?id={submission_id}",
            'comments': response_dict['descendants'],
        }
        submission_dicts.append(submission_dict)
    except KeyError:
        print(f"Failure to Find the corresponding key: {submission_id}")
        continue
    else:
        cur.execute(
            '''INSERT OR REPLACE INTO Submissions
            (submission_id, title, discussion_link, comments)
            VALUES ( ?, ?, ?, ? )''',
            (submission_id, submission_dict['title'], submission_dict['hn_link'], submission_dict['comments']))

    count += 1
    if count % 5 == 0:
        conn.commit()