# Reference:

- https://ehmatthes.github.io/pcc_2e/solutions/solutions/

- [GitHub API Documentation](https://developer.github.com/v3/)

Plotly:
- [Plotly: Python Figure Reference](https://plotly.com/python/reference/)
- [Plotly User Guide in Python](https://plotly.com/python/creating-and-updating-figures/)
- [Graphs and Plots Using Plotly via. pitt.edu](https://www.pitt.edu/~naraehan/presentation/Graphs_and_Plots_using_Plotly.html)
- [Plotly Filled Area Plots](https://plotly.com/python/filled-area-plots/)

# P2: Data Visualization

## CH15 - Generating Data

### Plotting a Simple Line Graph

- `plt.subplots()` is a function that returns a tuple containing a figure and axes object(s). Thus when using `fig, ax = plt.subplots()` you unpack this tuple into the variables `fig` and `ax`.

- The `subplots()` function can generate one or more plots in the same figure. The variable `fig` represents the entire figure or collection of plots that are generated. The variable `ax` represents a single plot in the figure and is the variable we’ll use most of the time. The function `plt.show()` opens Matplotlib’s viewer and displays the plot.

In [None]:
# Plotting a Simple Line Graph

import matplotlib.pyplot as plt

squares = [1, 4, 9, 16, 25]
fig, ax = plt.subplots()
ax.plot(squares)

plt.show()

In [None]:
# Changing the Label Type and Line Thickness

import matplotlib.pyplot as plt

squares = [x**2 for x in range(1, 6)]
fig, ax = plt.subplots()
ax.plot(squares, linewidth = 3)

# Set chart title and label axes.
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=14)
ax.set_ylabel("Square of Value", fontsize=14)

# Set size of tick labels.
ax.tick_params(axis='both', labelsize=14)

plt.show()

In [None]:
# Correcting the Plot

import matplotlib.pyplot as plt

input_values = range(1, 6)
squares = [x**2 for x in input_values]

fig, ax = plt.subplots()
ax.plot(input_values, squares, linewidth = 3)

# Set chart title and label axes.
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=14)
ax.set_ylabel("Square of Value", fontsize=14)

# Set size of tick labels.
ax.tick_params(axis='both', labelsize=14)

plt.show()

In [None]:
# Using Built-in Styles

import matplotlib.pyplot as plt
plt.style.available

In [None]:
import matplotlib.pyplot as plt

input_values = range(1, 6)
squares = [x**2 for x in input_values]

plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(input_values, squares, linewidth = 3)

# Set chart title and label axes.
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=14)
ax.set_ylabel("Square of Value", fontsize=14)

# Set size of tick labels.
ax.tick_params(axis='both', labelsize=14)

plt.show()

In [None]:
# Plotting and Styling Individual Points with scatter()

import matplotlib.pyplot as plt

plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.scatter(2, 4)

plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.scatter(2, 4, s=200) # call scatter() and use the s argument to set the size of the dots

# Set chart title and label axes.
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=14)
ax.set_ylabel("Square of Value", fontsize=14) 

# Set size of tick labels.
ax.tick_params(axis='both', which='major', labelsize=14)

plt.show()

In [None]:
# Plotting a Series of Points with scatter()

import matplotlib.pyplot as plt

x_values = range(1, 6)
y_values = [x**2 for x in x_values]

plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.scatter(x_values, y_values, s=100)

# Set chart title and label axes.
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=14)
ax.set_ylabel("Square of Value", fontsize=14) 

# Set size of tick labels.
ax.tick_params(axis='both', which='major', labelsize=14)

plt.show()

In [None]:
# Calculating Data Automatically

import matplotlib.pyplot as plt

x_values = range(1, 1001)
y_values = [x**2 for x in x_values]

plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.scatter(x_values, y_values, s=10)

# Set chart title and label axes.
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=14)
ax.set_ylabel("Square of Value", fontsize=14)

# Set the range for each axis.
ax.axis([0, 1100, 0, 1100000])

# Set size of tick labels.
ax.tick_params(axis='both', which='major', labelsize=14)

plt.show()

In [None]:
# Defining Custom Colors

import matplotlib.pyplot as plt

x_values = range(1, 1001)
y_values = [x**2 for x in x_values]

plt.style.use('seaborn')
fig, ax = plt.subplots()
# ax.scatter(x_values, y_values, c='red', s=10) # pass c to scatter() with the name of a color
"""
You can also define custom colors using the RGB color model. To define
a color, pass the c argument a tuple with three decimal values (one each
for red, green, and blue in that order), using values between 0 and 1.
"""
ax.scatter(x_values, y_values, c=(0, 0.8, 0), s=10)

# Set chart title and label axes.
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=14)
ax.set_ylabel("Square of Value", fontsize=14)

# Set the range for each axis.
ax.axis([0, 1100, 0, 1100000])

# Set size of tick labels.
ax.tick_params(axis='both', which='major', labelsize=14)

plt.show()

In [None]:
# Using a Colormap

import matplotlib.pyplot as plt

x_values = range(1, 1001)
y_values = [x**2 for x in x_values]

plt.style.use('seaborn')
fig, ax = plt.subplots()
# https://matplotlib.org/gallery/color/colormap_reference.html#colormap-reference
ax.scatter(x_values, y_values, c=y_values, cmap=plt.cm.autumn, s=10)

# Set chart title and label axes.
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=14)
ax.set_ylabel("Square of Value", fontsize=14)

# Set the range for each axis.
ax.axis([0, 1100, 0, 1100000])

# Set size of tick labels.
ax.tick_params(axis='both', which='major', labelsize=14)

# Saving Your Plots Automatically
plt.savefig('resources/squares_plot.png', bbox_inches='tight')

plt.show()

#### Exercise: Cubes

In [None]:
import matplotlib.pyplot as plt

input_values = range(1, 5001)
cubes = [x**3 for x in input_values]

plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.scatter(input_values, cubes, c=cubes, cmap=plt.cm.winter, s=10)

# Set chart title and label axes.
ax.set_title('Cubic Numbers', fontsize=24)
ax.set_xlabel('Value', fontsize=14)
ax.set_ylabel('Cube of Value', fontsize=14)

# Set the range for each axis.
ax.axis([0, 6000, 0, 1.4e11])

# Set size of tick labels.
ax.tick_params(axis='both', labelsize=14)

# Save the chart
plt.savefig('resources/cubes_plot.png', bbox_inches='tight')

plt.show()

### Random Walks

- fig
    - plot styles = ['scatter', 'plot']
    - built-in styles
    - save plots
    - size
    - chart title
    - label axes
    - dpi
    
- points
    - nums
    - colors
        - style A = ['color name', 'rgb', 'colormap']
        - style B = ['color', 'edge colors']
    - size
    - start / end point

In [None]:
# Creating the RandomWalk() Class

from random import choice

class RandomWalk:
    """A class to generate random walks."""
    
    def __init__(self, num_points=5000):
        """Initialize attributes of a walk."""
        self.num_points = num_points
        
        # All walks start at (0, 0).
        self.x_values = [0]
        self.y_values = [0]
        
    def fill_walk(self):
        """Calculate all the points in the walk."""
        
        # Keep taking steps until the walk reaches the desired length.
        while len(self.x_values) < self.num_points:
            
            # Decide which direction to go and how far to go in that direction.
            x_direction = choice([1, -1])
            x_distance = choice([0, 1, 2, 3, 4])
            x_step = x_direction * x_distance
            
            y_direction = choice([1, -1])
            y_distance = choice([0, 1, 2, 3, 4])
            y_step = y_direction * y_distance
            
            # Reject moves that go nowhere.
            if x_step == 0 and y_step == 0:
                continue
                
            # Calculate the new position.
            x = self.x_values[-1] + x_step
            y = self.y_values[-1] + y_step
            
            # Get the values for the next walk.
            self.x_values.append(x)
            self.y_values.append(y)

In [None]:
# Plotting the Random Walk

import matplotlib.pyplot as plt

# Make a random walk.
rw = RandomWalk()
rw.fill_walk()

# Plot the points in the walk.
plt.style.use('classic')
fig, ax = plt.subplots()
ax.scatter(rw.x_values, rw.y_values, s=10)
plt.show()

In [None]:
# Generating Multiple Random Walks

import matplotlib.pyplot as plt

# Keep making new walks, as long as the program is active.
while True:
    # Make a random walk.
    rw = RandomWalk()
    rw.fill_walk()

    # Plot the points in the walk.
    plt.style.use('classic')
    fig, ax = plt.subplots()
    ax.scatter(rw.x_values, rw.y_values, s=10)
    plt.show()
    
    keep_running = input("Make another walk? (y/n): ")
    if keep_running == 'n':
        break

In [None]:
# Styling the Walk

from random_walk import RandomWalk
import matplotlib.pyplot as plt

while True:
    # Make a random walk.
    rw = RandomWalk()
    rw.fill_walk()

    # Plot the points in the walk.
    plt.style.use('classic')
    fig, ax = plt.subplots()
    
    # Coloring the Points:
    point_numbers = range(rw.num_points)
    ax.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.spring,\
               edgecolors='none', s=15)
    plt.show()
    
    keep_running = input("Make another walk? (y/n): ")
    if keep_running == 'n':
        break

In [None]:
# Plotting the Starting and Ending Points

from random_walk import RandomWalk
import matplotlib.pyplot as plt

while True:
    # Make a random walk.
    rw = RandomWalk()
    rw.fill_walk()

    # Plot the points in the walk.
    plt.style.use('classic')
    fig, ax = plt.subplots()
    
    # Coloring the Points:
    point_numbers = range(rw.num_points)
    ax.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.spring,\
               edgecolors='none', s=15)
    
    # Emphasize the first and last points.
    ax.scatter(0, 0, c='green', edgecolors='white', s=100)
    ax.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='white', s=100)

    plt.show()
    
    keep_running = input("Make another walk? (y/n): ")
    if keep_running == 'n':
        break

In [None]:
# Cleaning Up the Axes

from random_walk import RandomWalk
import matplotlib.pyplot as plt

while True:
    # Make a random walk.
    rw = RandomWalk()
    rw.fill_walk()

    # Plot the points in the walk.
    plt.style.use('classic')
    fig, ax = plt.subplots()
    
    # Coloring the Points:
    point_numbers = range(rw.num_points)
    ax.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.spring,\
               edgecolors='none', s=15)
    
    # Emphasize the first and last points.
    ax.scatter(0, 0, c='green', edgecolors='white', s=100)
    ax.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='white', s=100)

    # Remove the axes.
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    
    plt.show()
    
    keep_running = input("Make another walk? (y/n): ")
    if keep_running == 'n':
        break

In [None]:
# Adding Plot Points

from random_walk import RandomWalk
import matplotlib.pyplot as plt

while True:
    # Make a random walk.
    rw = RandomWalk(50_000)
    rw.fill_walk()

    # Plot the points in the walk.
    plt.style.use('classic')
    fig, ax = plt.subplots()
    
    # Coloring the Points:
    point_numbers = range(rw.num_points)
    ax.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.spring,\
               edgecolors='none', s=1)
    
    # Emphasize the first and last points.
    ax.scatter(0, 0, c='green', edgecolors='white', s=100)
    ax.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='white', s=100)

    # Remove the axes.
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    
    plt.show()
    
    keep_running = input("Make another walk? (y/n): ")
    if keep_running == 'n':
        break

In [None]:
# Altering the Size to Fill the Screen

from random_walk import RandomWalk
import matplotlib.pyplot as plt

while True:
    # Make a random walk.
    rw = RandomWalk(50_000)
    rw.fill_walk()

    # Plot the points in the walk.
    plt.style.use('classic')
    fig, ax = plt.subplots(figsize=(16, 9)) # Adjust the size
    
    # Coloring the Points:
    point_numbers = range(rw.num_points)
    ax.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.spring,\
               edgecolors='none', s=1)
    
    # Emphasize the first and last points.
    ax.scatter(0, 0, c='green', edgecolors='white', s=100)
    ax.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='white', s=100)

    # Remove the axes.
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    
    plt.show()
    
    keep_running = input("Make another walk? (y/n): ")
    if keep_running == 'n':
        break

#### Exercise: Molecular Motion

In [None]:
# Creating the RandomWalk() Class

from random import choice

class RandomWalk:
    """A class to generate random walks."""
    
    def __init__(self, num_points=5000):
        """Initialize attributes of a walk."""
        self.num_points = num_points
        
        # All walks start at (0, 0).
        self.x_values = [0]
        self.y_values = [0]
        
    def fill_walk(self):
        """Calculate all the points in the walk."""
        
        # Keep taking steps until the walk reaches the desired length.
        while len(self.x_values) < self.num_points:
            
            # Decide which direction to go and how far to go in that direction.
            x_direction = choice([-1, 1])
            x_distance = choice(range(5))
            x_step = x_direction * x_distance
            
            y_direction = choice([-1, 1])
            y_distance = choice(range(100))
            y_step = y_direction * y_distance
            
            # Reject moves that go nowhere.
            if x_step == 0 and y_step == 0:
                continue
                
            # Calculate the new position.
            x = self.x_values[-1] + x_step
            y = self.y_values[-1] + y_step
            
            # Get the values for the next walk.
            self.x_values.append(x)
            self.y_values.append(y)

In [None]:
# from random_walk import RandomWalk
import matplotlib.pyplot as plt

while True:
    # Make a random walk.
    rw = RandomWalk()
    rw.fill_walk()

    plt.style.use('classic')
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(rw.x_values, rw.y_values, c=(0.5, 0.5, 0.8), linewidth=1)

    # Set chart title.
    ax.set_title('Molecular Motion', fontsize=18)

    # Remove the axes.
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # Emphasis the first and last points.
    ax.scatter(0, 0, c='green', edgecolors='gray', s=100)
    ax.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='gray', s=100)

    # Save the plot.
    plt.savefig('resources/molecular_motion_plot.png', bbox_inches='tight')

    plt.show()
    
    keep_running = input("Make another motion? (y/n): ")
    if keep_running == 'n':
        break

### Rolling Dice with Plotly

- In this project, we’ll analyze the results of rolling dice. When you roll one regular, six-sided die, you have an equal chance of rolling any of the numbers from 1 through 6. However, when you use two dice, you’re more likely to roll certain numbers rather than others. We’ll try to determine which numbers are most likely to occur by generating a data set that represents rolling dice. Then we’ll plot the results of a large number of rolls to determine which results are more likely than others.

- Plotly is particularly useful when you’re creating visualizations that will be displayed in a browser, because the visualizations will scale automatically to fit the viewer’s screen. Visualizations that Plotly generates are also interactive.

In [None]:
# Creating the Die Class

from random import randint

class Die:
    """A class representing a single die."""
    
    def __init__(self, num_sides=6):
        """Assume a 6-sided die."""
        self.num_sides = num_sides
        
    def roll(self):
        """Return a random value between 1 and the number of sides."""
        return randint(1, self.num_sides)

In [None]:
# Rolling the Die

from die import Die

# Create a D6.
die = Die()

# Make some rolls, and store results in a list.
results = []
for roll_num in range(100):
    result = die.roll()
    results.append(result)
    
print(results)

In [None]:
# Analyzing the Results

from die import Die

# Create a D6.
die = Die()

# Make some rolls, and store results in a list.
results = []
for roll_num in range(1000):
    result = die.roll()
    results.append(result)
    
# Calculate the frequencies of each number.
frequencies = {}
for value in range(1, die.num_sides+1):
    frequency = results.count(value)
    frequencies[value] = frequency
    
print(frequencies)

In [None]:
# Making a Histogram

from plotly.graph_objs import Bar, Layout
from plotly import offline

data = [Bar(x=list(frequencies.keys()),
            y=list(frequencies.values()))]

x_axis_config = {'title': 'Result'}
y_axis_config = {'title': 'Frequency of Result'}
my_layout = Layout(title='Results of rolling one D6 1000 times',
                   xaxis=x_axis_config, yaxis=y_axis_config)

offline.iplot({'data': data, 'layout': my_layout}, filename='d6.html')

In [None]:
# Rolling Two Dice

from plotly.graph_objs import Bar, Layout
from plotly import offline

from die import Die

# Creat two D6 dice
die_1 = Die()
die_2 = Die()

# Make some rolls, and store results in a list.
results = []
for roll_num in range(50_000):
    result = die_1.roll() + die_2.roll()
    results.append(result)
    
# Calculate the frequencies of each number.
frequencies = {}
max_result = die_1.num_sides + die_2.num_sides
for value in range(2, max_result+1):
    frequency = results.count(value)
    frequencies[value] = frequency
    
# Visulize the results
data = [Bar(x=list(frequencies.keys()),
            y=list(frequencies.values()))]

x_axis_config = {'title': 'Result', 'dtick': 1}
y_axis_config = {'title': 'Frequency of Result'}
my_layout = Layout(title='Results of rolling two D6 50000 times',
                   xaxis=x_axis_config, yaxis=y_axis_config)

offline.plot({'data': data, 'layout': my_layout}, filename='d6_d6.html')

In [None]:
# Rolling Dice of Different Sizes

from plotly.graph_objs import Bar, Layout
from plotly import offline

from die import Die

# Create a D6 and a D10.
die_1 = Die()
die_2 = Die(10)

# Make some rolls, and store results in a list.
results = []
for roll_num in range(50_000):
    result = die_1.roll() + die_2.roll()
    results.append(result)
    
# Calculate the frequencies of each number.
frequencies = {}
max_result = die_1.num_sides + die_2.num_sides
for value in range(2, max_result+1):
    frequency = results.count(value)
    frequencies[value] = frequency

# Visulize the results
data = [Bar(x=list(frequencies.keys()),
            y=list(frequencies.values()))]

x_axis_config = {'title': 'Result', 'dtick': 1}
y_axis_config = {'title': 'Frequency of Result'}
my_layout = Layout(title='Results of rolling a D6 and D10 50000 times',
                   xaxis=x_axis_config, yaxis=y_axis_config)

offline.iplot({'data': data, 'layout': my_layout}, filename='d6_d10.html')

#### Exercise: Rolling Dice

In [None]:
# Two D8s

from plotly.graph_objs import Bar, Layout
from plotly import offline

from die import Die

# Create 3 D6.
die_1 = Die(8)
die_2 = Die(8)

# Make some rolls, and store results in a list.
results = []
for roll_num in range(1_000):
    result = die_1.roll() + die_2.roll()
    results.append(result)
    
# Calculate the frequencies of each number.
frequencies = {}
max_result = die_1.num_sides + die_2.num_sides
for value in range(2, max_result+1):
    frequency = results.count(value)
    frequencies[value] = frequency

# Visulize the results
data = [Bar(x=list(frequencies.keys()),
            y=list(frequencies.values()))]

x_axis_config = {'title': 'Result', 'dtick': 1}
y_axis_config = {'title': 'Frequency of Result'}
my_layout = Layout(title='Results of rolling two D8s 1000 times',
                   xaxis=x_axis_config, yaxis=y_axis_config)

offline.iplot({'data': data, 'layout': my_layout}, filename='d8_d8.html')

In [None]:
# Three Dice

from plotly.graph_objs import Bar, Layout
from plotly import offline

from die import Die

# Create 3 D6s.
die_1 = Die()
die_2 = Die()
die_3 = Die()

# Make some rolls, and store results in a list.
results = []
for roll_num in range(1_000):
    result = die_1.roll() + die_2.roll() + die_3.roll()
    results.append(result)
    
# Calculate the frequencies of each number.
frequencies = {}
max_result = die_1.num_sides + die_2.num_sides + die_3.num_sides
for value in range(2, max_result+1):
    frequency = results.count(value)
    frequencies[value] = frequency

# Visulize the results
data = [Bar(x=list(frequencies.keys()),
            y=list(frequencies.values()))]

x_axis_config = {'title': 'Result', 'dtick': 1}
y_axis_config = {'title': 'Frequency of Result'}
my_layout = Layout(title='Results of rolling three D6s 1000 times',
                   xaxis=x_axis_config, yaxis=y_axis_config)

offline.iplot({'data': data, 'layout': my_layout}, filename='d6_d6_d6.html')

In [None]:
# Multiplication:

from plotly.graph_objs import Bar, Layout
from plotly import offline

from die import Die

# Create 3 D6.
die_1 = Die(8)
die_2 = Die(8)

# Make some rolls, and store results in a list.
results = []
for roll_num in range(50_000):
    result = die_1.roll() * die_2.roll()
    results.append(result)
    
# Calculate the frequencies of each number.
frequencies = {}
max_result = die_1.num_sides + die_2.num_sides
for value in range(2, max_result+1):
    frequency = results.count(value)
    frequencies[value] = frequency

# Visulize the results
data = [Bar(x=list(frequencies.keys()),
            y=list(frequencies.values()))]

x_axis_config = {'title': 'Result', 'dtick': 1}
y_axis_config = {'title': 'Frequency of Result'}
my_layout = Layout(title='Results of rolling two D8s 50000 times',
                   xaxis=x_axis_config, yaxis=y_axis_config)

offline.iplot({'data': data, 'layout': my_layout}, filename='d8_d8_multiplication.html')

In [None]:
# Die Comprehensions:

from plotly.graph_objs import Bar, Layout
from plotly import offline

from die import Die

# Create two D6s.
die_1, die_2 = Die(), Die()

# Make some rolls, and store results in a list.
results = [die_1.roll() + die_2.roll() for roll_num in range(1_000)]
    
# Calculate the frequencies of each number.
max_result = die_1.num_sides + die_2.num_sides
frequencies = [results.count(x) for x in range(2, max_result+1)]

# Visulize the results
x_values = list(range(2, max_result+1))
data = [Bar(x=x_values, y=frequencies)]

x_axis_config = {'title': 'Result', 'dtick': 1}
y_axis_config = {'title': 'Frequency of Result'}
my_layout = Layout(title='Results of rolling two D6s 1000 times',
                   xaxis=x_axis_config, yaxis=y_axis_config)

offline.iplot({'data': data, 'layout': my_layout}, filename='d6_d6_with_list_comprehensions.html')

#### Exercise: Matplotlib method

In [None]:
import matplotlib.pyplot as plt

from die import Die

# Create two D6s.
die_1, die_2 = Die(), Die()

# Make some rolls, and store results in a list.
results = [die_1.roll() + die_2.roll() for roll_num in range(1000)]

# Analysize the results.
max_result = die_1.num_sides + die_2.num_sides
frequencies = [results.count(value) for value in range(2, max_result+1)]

# Visualize the results wuth Matplotlib.
fig, ax = plt.subplots(figsize=(10, 6))
plt.style.use('ggplot')
x_values = list(range(2, max_result+1))

plt.title('Results of rolling two D6s 1000 times', fontsize=18)
plt.xlabel('Vaule', fontsize=14)
plt.ylabel('Frequency of Result', fontsize=14)
plt.bar(x_values, frequencies, color='green')

plt.show()

In [None]:
import matplotlib.pyplot as plt

from die import Die

# Create two D6s.
die_1, die_2 = Die(), Die()

# Make some rolls, and store results in a list.
results = [die_1.roll() + die_2.roll() for roll_num in range(1000)]

# Analysize the results.
max_result = die_1.num_sides + die_2.num_sides
frequencies = [results.count(value) for value in range(2, max_result+1)]

# Visualize the results wuth Matplotlib.
fig, ax = plt.subplots(figsize=(10, 6))

plt.style.use('seaborn')
x_values = list(range(2, max_result+1))
ax.bar(x_values, frequencies, color='green', alpha=0.7, edgecolor='blue')

plt.title('Results of rolling two D6s 1000 times', fontsize=18)
plt.xlabel('Vaule', fontsize=14)
plt.ylabel('Frequency of Result', fontsize=14)
plt.tick_params(axis='both', labelsize=14)

plt.show()

## CH16 - Downloading Data

Downloading Your Own Data: [NOAA Climate Data Online](https://www.ncdc.noaa.gov/cdo-web/)


### The CSV File Format

comma-separated values

In [None]:
# Parsing the CSV File Headers

import csv

filename = 'data/sitka_weather_07-2018_simple.csv'
with open(filename) as f:
    reader = csv.reader(f)
    
    """
    Note that we’ve removed the line print(header_row) in favor of this more detailed version.
    """
    header_row = next(reader)
    print(header_row)

In [None]:
# Printing the Headers and Their Positions

for index, column_header in enumerate(header_row):
    print(index, column_header)

In [None]:
# Extracting and Reading Data

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    
    # Get high temperatures from this file.
    highs = []
    for row in reader:
        high = int(row[5])
        highs.append(high)
        
print(highs)

In [None]:
# Plotting Data in a Temperature Chart

import matplotlib.pyplot as plt

plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(highs, c='red')

# Format plot.
plt.title('Daily High Tempratures, July 2018', fontsize=20)
plt.ylabel('Temperature(F)', fontsize=14)
plt.tick_params(axis='both', labelsize=14)

plt.show()

#### The datetime Module

- We can construct an object representing July 1, 2018 using the `strptime()` method from the datetime module.

- The strptime() method can take a variety of arguments to determine how to interpret the date.

| Argument | Meaning |
| :--- | :--- |
| %A | Weekday name, such as Monday |
| %B | Month name, such as January |
| %m | Month, as a number (01 to 12) |
| %d | Day of the month, as a number (01 to 31) |
| %Y | Four-digit year, such as 2019 |
| %y | Two-digit year, such as 19 |
| %H | Hour, in 24-hour format (00 to 23) |
| %I | Hour, in 12-hour format (01 to 12) |
| %p | am or pm |
| %M | Minutes (00 to 59) |
| %S | Seconds (00 to 61) |

In [None]:
from datetime import datetime
first_date = datetime.strptime('2020-04-14', '%Y-%m-%d')
print(first_date)

In [None]:
# Plotting Dates

import matplotlib.pyplot as plt
from datetime import datetime
import csv

filename = 'data/sitka_weather_07-2018_simple.csv'
with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    
    # Printing the headers and their positions
    
    # Get high tempratures and dates from this file.
    dates, highs = [], []
    for row in reader:
        date = datetime.strptime(row[2], '%Y-%m-%d')
        high = round((int(row[5]) - 32) * 5/9, 2)
        dates.append(date)
        highs.append(high)

plt.style.use('seaborn')
fig, ax = plt.subplots(figsize=(14, 7))
ax.plot(dates, highs, c='red')

# Format plot.
plt.title('Daily High Tempratures, July 2018', fontsize=24)
plt.xlabel('', fontsize=14)
fig.autofmt_xdate() # draw the date labels diagonally to prevent them from overlapping.
plt.ylabel('Temperature(C)', fontsize=14)
plt.tick_params(axis='both', labelsize=14)

plt.show()

In [None]:
# Plotting a Longer Timeframe

import matplotlib.pyplot as plt
from datetime import datetime
import csv

filename = 'data/sitka_weather_2018_simple.csv'
with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    
    # Printing the headers and their positions
    
    # Get high tempratures and dates from this file.
    dates, highs = [], []
    for row in reader:
        date = datetime.strptime(row[2], '%Y-%m-%d')
        high = round((int(row[5]) - 32) * 5/9, 2)
        dates.append(date)
        highs.append(high)

plt.style.use('seaborn')
fig, ax = plt.subplots(figsize=(14, 7))
ax.plot(dates, highs, c='red')

# Format plot.
plt.title('Daily High Tempratures - 2018', fontsize=24)
plt.xlabel('', fontsize=14)
fig.autofmt_xdate() # draw the date labels diagonally to prevent them from overlapping.
plt.ylabel('Temperature(C)', fontsize=14)
plt.tick_params(axis='both', labelsize=14)

plt.show()

In [None]:
# Plotting a Second Data Series

import matplotlib.pyplot as plt
from datetime import datetime
import csv

filename = 'data/sitka_weather_2018_simple.csv'
with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    
    # Printing the headers and their positions
    
    # Get high and low tempratures and dates from this file.
    dates, highs, lows = [], [], []
    for row in reader:
        date = datetime.strptime(row[2], '%Y-%m-%d')
        high = round((int(row[5]) - 32) * 5/9, 2)
        low = round((int(row[6]) - 32) * 5/9, 2)
        dates.append(date)
        highs.append(high)
        lows.append(low)
        
# Plot the high and low temperatures.
plt.style.use('seaborn')
fig, ax = plt.subplots(figsize=(14, 7))
ax.plot(dates, highs, c='red')
ax.plot(dates, lows, c='green')

# Format plot.
plt.title('Daily High Tempratures - 2018', fontsize=24)
plt.xlabel('', fontsize=14)
fig.autofmt_xdate() # draw the date labels diagonally to prevent them from overlapping.
plt.ylabel('Temperature(C)', fontsize=14)
plt.tick_params(axis='both', labelsize=14)

plt.show()

#### Shading an Area in the Chart

In [None]:
import matplotlib.pyplot as plt
from datetime import datetime
import csv

filename = 'data/sitka_weather_2018_simple.csv'
with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    
    # Get high and low tempratures and dates from this file.
    dates, highs, lows = [], [], []
    for row in reader:
        date = datetime.strptime(row[2], '%Y-%m-%d')
        high = round((int(row[5]) - 32) * 5/9, 2)
        low = round((int(row[6]) - 32) * 5/9, 2)
        dates.append(date)
        highs.append(high)
        lows.append(low)
        
# Plot the high and low temperatures.
plt.style.use('seaborn')
fig, ax = plt.subplots(figsize=(14, 7))
ax.plot(dates, highs, c='red', alpha=0.5)
ax.plot(dates, lows, c='green', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='green', alpha=0.1)

# Format plot.
plt.title('Daily High and Low Tempratures - 2018', fontsize=24)
plt.xlabel('', fontsize=14)
fig.autofmt_xdate() # draw the date labels diagonally to prevent them from overlapping.
plt.ylabel('Temperature(C)', fontsize=14)
plt.tick_params(axis='both', labelsize=14)

plt.show()

In [None]:
# Error Checking

import matplotlib.pyplot as plt
import csv

filename = 'data/death_valley_2018_simple.csv'
with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    for index, column_header in enumerate(header_row):
        print(index, column_header)
    
    print("\nPrint the first row after removing header row:")
    print(next(reader))

The date is in the same position at index 2. But the high and low temperatures
are at indexes 4 and 5, so we’d need to change the indexes in our code to reflect these new positions.

In [None]:
import matplotlib.pyplot as plt
from datetime import datetime
import csv

filename = 'data/death_valley_2018_simple.csv'
with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    
    # Get high and low tempratures and dates from this file.
    dates, highs, lows = [], [], []
    for row in reader:
        crt_date = datetime.strptime(row[2], '%Y-%m-%d')
        try:
            high = round((int(row[4]) - 32) * 5/9, 2)
            low = round((int(row[5]) - 32) * 5/9, 2)
        except ValueError:
            print(f"Missing data for {crt_date}.")
        else:
            dates.append(crt_date)
            highs.append(high)
            lows.append(low)
        
# Plot the high and low temperatures.
plt.style.use('seaborn')
fig, ax = plt.subplots(figsize=(14, 7))
ax.plot(dates, highs, c='red', alpha=0.5)
ax.plot(dates, lows, c='green', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='green', alpha=0.1)

# Format plot.
title = "Daily high and low temperatures - 2018\nDeath Valley, CA"
plt.title(title, fontsize=20)
plt.xlabel('', fontsize=14)
fig.autofmt_xdate() # draw the date labels diagonally to prevent them from overlapping.
plt.ylabel('Temperature(C)', fontsize=14)
plt.tick_params(axis='both', labelsize=14)

plt.show()

#### Daily Temperatures: Plotly Method

In [None]:
import plotly.graph_objs as go
from plotly import offline
from datetime import datetime
import csv

filename = 'data/death_valley_2018_simple.csv'
with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    
    # Get high and low temperatures and dates from this file.
    dates, highs, lows = [], [], []
    for row in reader:
        crt_date = datetime.strptime(row[2], '%Y-%m-%d')
        try:
            high = round((int(row[4]) - 32) * 5/9, 2)
            low = round((int(row[5]) - 32) * 5/9, 2)
        except ValueError:
            print(f"Missing data for {crt_date}.")
        else:
            dates.append(crt_date)
            highs.append(high)
            lows.append(low)
        
# Interior Filling for Area Chart
track_highs = go.Scatter(x=dates, y=highs,
                        name='High temperatures',
                        mode='lines',
                        fill=None)
track_lows = go.Scatter(x=dates, y=lows,
                        name='Low temperatures',
                        mode='lines',
                        fill='tonexty')
data = [track_highs, track_lows]

# Format plots.
title = 'Daily high and low temperatures - 2018\nDeath Valley, CA'
y_axis_config = {'title': 'Temperatures(C)'}
my_layout = go.Layout(title=title, yaxis=y_axis_config)
fig = go.Figure({'data': data, 'layout': my_layout})

offline.iplot(fig, filename='Daily high and low temperatures - 2018.html')

#### Exercies: Sitka Rainfall

In [None]:
#  Sitka Rainfall

import plotly.graph_objects as go
from plotly import offline
import csv

filename = 'data/sitka_weather_2018_simple.csv'
with open(filename) as f:
    contents = csv.reader(f)
    # View the header row
    header_row = next(contents)
    for index, column_header in enumerate(header_row):
        print(index, column_header)

In [None]:
with open(filename) as f:
    contents = csv.reader(f)
    next(contents) # remove the header row
    
    # Get the rainfall amounts and dates from the file
    rainfalls, dates = [], []
    for row in contents:
        rainfalls.append(row[3])
        dates.append(row[2])

# Plot the rainfall
trace = go.Scatter(x=dates, y=rainfalls, mode='lines')
data = [trace]

# Format the plot
title = 'Daily rainfall in Sitka - 2018'
y_axis_config = {'title': 'PRCP'}
layout = go.Layout(title=title, yaxis=y_axis_config)
fig = go.Figure(data=data, layout=layout)

offline.iplot(fig, 'Daily rainfall in Sitka - 2018')

In [None]:
# Make a comparision between the rainfall amounts in Sitka and Death Valley

class Rainfall:
    """Represent the rainfall amounts in a place."""
    
    def __init__(self, filename):
        """Initialize the attributes of rainfall."""
        self.filename = filename
        self.rainfalls, self.dates = [], []
        
    def get_rainfall(self):
        """Get the rainfall amounts and dates from the file"""

        with open(self.filename) as f:
            contents = csv.reader(f)
            next(contents) # remove the header row

            # Get the rainfall and date data from the file
            for row in contents:
                self.rainfalls.append(row[3])
                self.dates.append(row[2])

# Create two rainfalls
sitka_rainfall = Rainfall('data/sitka_weather_2018_simple.csv')
death_valley_rainfall = Rainfall('data/death_valley_2018_simple.csv')

# Get the rainfall amounts in both places
sitka_rainfall.get_rainfall()
death_valley_rainfall.get_rainfall()

# Plot the rainfall
sitka_track = go.Scatter(x=sitka_rainfall.dates,
                         y=sitka_rainfall.rainfalls,
                         name='Sitka Rainfalls',
                         mode='lines',
                         fill=None)
death_valley_track = go.Scatter(x=death_valley_rainfall.dates,
                         y=death_valley_rainfall.rainfalls,
                         name='Death Valley Rainfalls',
                         mode='lines',
                         fill='tonexty')
data = [sitka_track, death_valley_track]

# Format the plot
chart_title = 'Comparison the rainfall amounts: \nSitka vs Death Valley, 2018'
y_axis_config = {'title': 'PRCP'}
layout = go.Layout(title=chart_title, yaxis=y_axis_config)
fig = go.Figure(data=data, layout=layout)

offline.iplot(fig, chart_title)

#### Exercise: Sitka–Death Valley Comparison

In [None]:
import plotly.graph_objects as go
from plotly import offline
from plotly.subplots import make_subplots
import csv

def check_header(filename):
    """Check the header row to indentify its index."""

    with open(filename) as f:
        contents = csv.reader(f)
        # View the header row
        header_row = next(contents)
        print(f"\n{filename} :")
        for index, column_header in enumerate(header_row):
            print(index, column_header)
            
check_header('data/sitka_weather_2018_simple.csv')
check_header('data/death_valley_2018_simple.csv')

In [None]:
# Make a comparision between the temperatures in Sitka and Death Valley

class Temperature:
    """Represent the temperatures in a place."""
    
    def __init__(self, filename, tmax_index):
        """Initialize the attributes of temperature."""
        self.filename = filename
        self.tmax_index = tmax_index
        self.tmin_index = tmax_index + 1
        self.tmax, self.tmin, self.dates = [], [], []
        
    def get_temperatures(self):
        """Get the maximum and minimum temperatures and dates from the file"""

        with open(self.filename) as f:
            contents = csv.reader(f)
            next(contents) # remove the header row

            # Get the temperatures and dates
            for row in contents:
                crt_date = datetime.strptime(row[2], '%Y-%m-%d')
                try:
                    high = round((int(row[self.tmax_index]) - 32) * 5/9, 2)
                    low = round((int(row[self.tmin_index]) - 32) * 5/9, 2)
                except ValueError:
                    print(f"Missing data for {crt_date}.")
                else:
                    self.tmax.append(high)
                    self.tmin.append(low)
                    self.dates.append(crt_date)
       
    def get_temp_range(self):
        """Visualize the temperature range."""
        
        tmax_trace = go.Scatter(x=self.dates,
                         y=self.tmax,
                         name='TMAX',
                         mode='lines',
                         fill=None)
        tmin_trace = go.Scatter(x=self.dates,
                         y=self.tmin,
                         name='TMIN',
                         mode='lines',
                         fill='tonexty')
        
        data = [tmax_trace, tmin_trace]
        
        # Customize the chart title
        if 'sitka' in self.filename:
            chart_title = 'Temperature range of Sitka in 2018'
        elif 'death' in self.filename:
            chart_title = 'Temperature range of Death Valley in 2018'
        else:
            chart_title = f'Temperature range of {filename} place.'
        
        # Format the plot
        y_axis_config = {'title': 'Temperature', 'range': [-10, 60]}
        layout = go.Layout({'title': chart_title,
                            'yaxis': y_axis_config})
        fig = go.Figure(data=data, layout=layout)

        offline.iplot(fig, chart_title)

# Create two Temperature instances
sitka_temp = Temperature('data/sitka_weather_2018_simple.csv', 5)
death_valley_temp = Temperature('data/death_valley_2018_simple.csv', 4)

# Get the tempeeratures in both places
sitka_temp.get_temperatures()
death_valley_temp.get_temperatures()

# Plot the temperatures range
sitka_temp.get_temp_range()
death_valley_temp.get_temp_range()

#### Exercise: Automatic Indexes:

In [None]:
import plotly.graph_objects as go
from plotly import offline
from datetime import datetime
import csv

class Station:
    """Represent a weather station"""
    
    def __init__(self, filename):
        """Initialize the attributes of a weather station"""
        self.filename = filename
        self.headers = ['DATE', 'TMAX', 'TMIN']
        self.header_index = {}
        
    def get_index(self):
        """Get the index of a column header"""

        with open(self.filename) as f:
            contents = csv.reader(f)
            header_row = next(contents)
            for header in self.headers:
                for index, column_header in enumerate(header_row):
                    if column_header == header:
                        self.header_index[header] = index

class Temperature(Station):
    """Represent aspects of a station, specific to the temperatures."""
    
    def __init__(self, filename):
        """Initialize the attributes of temperature."""
        super().__init__(filename)
        self.dates, self.tmaxs, self.tmins = [], [], []
        
    def get_temperatures(self):
        """Get the maximum and minimum temperatures and dates from the file"""

        with open(self.filename) as f:
            contents = csv.reader(f)
            next(contents) # remove the header row

            # Get the temperatures and dates
            for row in contents:
                crt_date = datetime.strptime(row[self.header_index['DATE']], '%Y-%m-%d')
                try:
                    # Convert F to C
                    high = round((int(row[self.header_index['TMAX']]) - 32) * 5/9, 2)
                    low = round((int(row[self.header_index['TMIN']]) - 32) * 5/9, 2)
                except ValueError:
                    print(f"Missing data for {crt_date}.")
                else:
                    self.tmaxs.append(high)
                    self.tmins.append(low)
                    self.dates.append(crt_date)
       
    def get_temp_range(self):
        """Visualize the temperature range."""
        
        tmax_trace = go.Scatter(x=self.dates,
                         y=self.tmaxs,
                         name='TMAX',
                         mode='lines',
                         fill=None)
        tmin_trace = go.Scatter(x=self.dates,
                         y=self.tmins,
                         name='TMIN',
                         mode='lines',
                         fill='tonexty')
        
        data = [tmax_trace, tmin_trace]
        
        # Customize the chart title
        if 'sitka' in self.filename:
            chart_title = 'Temperature range of Sitka in 2018'
        elif 'death' in self.filename:
            chart_title = 'Temperature range of Death Valley in 2018'
        else:
            chart_title = f'Temperature range of {self.filename}.'
        
        # Format the plot
        y_axis_config = {'title': 'Temperature (C)', 'range': [-10, 60]}
        layout = go.Layout({'title': chart_title,
                            'yaxis': y_axis_config})
        fig = go.Figure(data=data, layout=layout)

        offline.iplot(fig, chart_title)

# Create two Temperature instances
sk = Temperature('data/sitka_weather_2018_simple.csv')
dv = Temperature('data/death_valley_2018_simple.csv')

# Get the index of a header
sk.get_index()
dv.get_index()

# Get the temperatures in both places
sk.get_temperatures()
dv.get_temperatures()

# Plot the temperatures range
sk.get_temp_range()
dv.get_temp_range()

In [None]:
# Quiz: Get the index by list conprehension

class Station:
    """Represent a weather station"""
    
    def __init__(self, filename):
        """Initialize the attributes of a weather station"""
        self.filename = filename
        self.headers = ['DATE', 'TMAX', 'TMIN']
        self.header_index = []
        
    def get_index(self):
        """Get the index of a column header"""

        with open(self.filename) as f:
            contents = csv.reader(f)
            header_row = next(contents)
            
            # List comprehension
            self.header_index = [
                index 
                for header in self.headers 
                for index, column_header in enumerate(header_row) 
                if column_header == header
            ]
#             for header in self.headers:
#                 for index, column_header in enumerate(header_row):
#                     if column_header == header:
#                         self.header_index[header] = index
                        
sk = Station('data/sitka_weather_2018_simple.csv')
sk.get_index()
sk.header_index

#### Exercise: Pandas method

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly import offline
from datetime import datetime

def temp_f2c(temp):
    """Convert F to C within temperatures"""
    return (round((temp - 32) * 5/9, 2))

def viz_temp_range(filename):
    """Visualize the temperature range through a station."""
    
    # Fuel the datas into a dataframe
    df = pd.read_csv(filename)
    df = df[['DATE', 'TMAX', 'TMIN']]
    df = df.dropna(axis=0)

    # Create filled area plot
    tmax_trace = go.Scatter(x=list(df['DATE']),
                            y=list(temp_f2c(df['TMAX'])),
                            name='TMAX',
                            mode='lines',
                            fill=None)
    tmin_trace = go.Scatter(x=list(df['DATE']),
                            y=list(temp_f2c(df['TMIN'])),
                            name='TMIN',
                            mode='lines',
                            fill='tonexty')

    # Customize the chart title
    if 'sitka' in filename:
        chart_title = 'Temperature range of Sitka in 2018'
    elif 'death' in filename:
        chart_title = 'Temperature range of Death Valley in 2018'
    else:
        chart_title = f'Temperature range of {filename}.'
            
    # Format the plot
    y_axis_config = {'title': 'Temperature (C)', 'range': [-10, 60]}
    layout = go.Layout({'title': chart_title,
                        'yaxis': y_axis_config})
    fig = go.Figure(data=[tmax_trace, tmin_trace], layout=layout)

    # Show the plot inline
    offline.iplot(fig)
    
viz_temp_range('data/sitka_weather_2018_simple.csv')
viz_temp_range('data/death_valley_2018_simple.csv')

#### Exercise: China Temperatures

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly import offline

filename = 'data/china_2018_simple.csv'
data_full = pd.read_csv(filename)

data_full.sample(5)

In [None]:
# Filter the datas relevant to temperatures
temp_fields = ['NAME', 'TAVG', 'DATE', 'TMAX', 'TMIN']
data = data_full[temp_fields]

# Remove the datas of April and later
data = data[data['DATE'] < '2020-04-01']

data.sample(5)

In [None]:
data.describe()

In [None]:
# Get the number of missing data per column
data.isnull().sum()

In [None]:
class City:
    """Represent a city"""
    
    def __init__(self, city, data=data):
        """Initialize the attributes"""
        
        self.data = data
        self.city = city
        self.df = pd.DataFrame()
        
    def get_dataframe(self):
        """Get the dataframe of a city."""
        
        # Filter the datas of a city
        # Inform the user if the city is not included
        if self.data['NAME'].str.contains(self.city).any():
            self.df= self.data[self.data['NAME'].str.contains(self.city)]

            # Filter out rows with NaNs both in TMAX and TMIN
            self.df = self.df[self.df[['TMAX', 'TMIN']].notnull().any(axis=1)]
            
            # Replace NaNs by preceding values
            self.df.fillna(method='ffill', inplace=True)
            
        else:
            print(f"Your city {self.city} is not included yet.")
            
    def viz_temp_range(self):
        """Analysis the temperature range of a city."""

        # Viz the result
        if self.data['NAME'].str.contains(self.city).any():
            tmaxs_trace = go.Scatter(x=self.df['DATE'], 
                                     y=self.df['TMAX'],
                                     name='TMax',
                                     line_color='red',
                                     fill=None,
                                     mode='lines')
            tmins_trace = go.Scatter(x=self.df['DATE'], 
                                     y=self.df['TMIN'],
                                     name='TMin',
                                     line_color='violet',
                                     fill='tonexty',
                                     mode='lines')

            # Format plots
            title = f"Daily temperature range in {self.city} - Q1 2020"
            y_axis_config = {'title': 'Temperature (C)', 'range': [-15, 35]}
            my_layout = go.Layout({'title': title,
                                   'yaxis': y_axis_config})
            fig = go.Figure(data=[tmaxs_trace, tmins_trace], layout=my_layout)

            offline.iplot(fig)
            
        else:
            print(f"Your city {self.city} is not included yet.")

In [None]:
# List the cities you want to access
city_names = ['beijing', 'shanghai', 'guangzhou']

for city_name in city_names:
    city = City(city_name.upper())
    city.get_dataframe()
    city.viz_temp_range()

In [None]:
# Make a comparison between the avg temperatures of different cities

# List the cities you want to access
city_names = ['beijing', 'shanghai', 'guangzhou']

# Initialize the data within the stacked plots
data_plot = []

# Create instances for each city,
# and get the trace list for stacked plots
for city_name in city_names:
    city = City(city_name.upper())
    city.get_dataframe()

    trace = go.Scatter(
        x=list(city.df['DATE']), 
        y=list(city.df['TAVG']),
        name=city_name.capitalize(),
    )
    data_plot.append(trace)

# Format plots
title = "Comparison between average temperatures - Q1 2020"
y_axis_config = {'title': 'Avg Temperature (C)', 'range': [-10, 30]}
my_layout = go.Layout({'title': title,
                       'yaxis': y_axis_config})
fig = go.Figure(data=data_plot, layout=my_layout)

offline.iplot(fig)

### Mapping Global Data Sets: JSON Format

- The `json.load()` function converts the data into a format Python can work with: in this case, a giant dictionary.

- <span class="mark">The `json.dump()` function takes a JSON data object and a file object, and writes the data to that file.</span>

- The geoJSON format follows the `(longitude, latitude)` convention,

In [None]:
# Examining JSON Data

import json

filename = 'data/eq_data_1_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)
    
readable_file = 'data/readable_eq_data.json'
with open(readable_file, 'w') as f:
    json.dump(all_eq_data, f, indent=4)

In [None]:
# Making a List of All Earthquakes

all_eq_dicts = all_eq_data['features']

# Print the number of eq records
print(len(all_eq_dicts))

In [None]:
# Extracting Magnitudes

mags = []
for eq_dict in all_eq_dicts:
    mag = eq_dict['properties']['mag']
    mags.append(mag)

print(mags[:10])

In [None]:
# Extracting Location Data

mags, lons, lats = [], [], []
for eq_dict in all_eq_dicts:
    mag = eq_dict['properties']['mag']
    lon = eq_dict['geometry']['coordinates'][0]
    lat = eq_dict['geometry']['coordinates'][1]
    mags.append(mag)
    lons.append(lon)
    lats.append(lat)

print(mags[:10])
print(lons[:5])
print(lats[:5])

In [None]:
# Building a World Map

import plotly.graph_objects as go
from plotly import offline

trace = go.Scattergeo(lon=lons, lat=lats)

my_layout = go.Layout(title='Global Earthquakes')
fig = go.Figure(data=[trace], layout=my_layout)

offline.iplot(fig)

In [None]:
# A Different Way of Specifying Chart Data

import plotly.graph_objects as go
from plotly import offline

data = [{
    'type': 'scattergeo',
    'lon': lons,
    'lat': lats,
}]

my_layout = go.Layout(title='Global Earthquakes')
fig = go.Figure(data=data, layout=my_layout)

offline.iplot(fig)

In [None]:
# Customizing Marker Size

import plotly.graph_objects as go
from plotly import offline

data = [{
    'type': 'scattergeo',
    'lon': lons,
    'lat': lats,
    'marker': {
        'size': [5*mag for mag in mags],
    },
}]

my_layout = go.Layout(title='Global Earthquakes')
fig = go.Figure(data=data, layout=my_layout)

offline.iplot(fig)

In [None]:
# Customizing Marker Size

import json
import plotly.graph_objects as go
from plotly import offline

# Laod the json file
filename = 'data/eq_data_30_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)
    
# Make a list of all eq
all_eq_dicts = all_eq_data['features']

# Constract the magnitudes, location datas
mags, lons, lats = [], [], []
for eq_dict in all_eq_dicts:
    mag = eq_dict['properties']['mag']
    lon = eq_dict['geometry']['coordinates'][0]
    lat = eq_dict['geometry']['coordinates'][1]
    mags.append(mag)
    lons.append(lon)
    lats.append(lat)
    
# Build a world map
data = [{
    'type': 'scattergeo',
    'lon': lons,
    'lat': lats,
    'marker': {
        'size': [3*mag for mag in mags],
        'color': mags,
        'colorscale': 'Rainbow',
#         'reversescale': True,
        'colorbar': {'title': 'Magnitude'},
    },
}]

my_layout = go.Layout(title='Global Earthquakes')
fig = go.Figure(data=data, layout=my_layout)

offline.iplot(fig)

In [None]:
# the available colorscales

from plotly import colors

for key in colors.PLOTLY_SCALES.keys():
    print(key)

In [None]:
# Adding Hover Text

import json
import plotly.graph_objects as go
from plotly import offline

# Laod the json file
filename = 'data/eq_data_30_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)
    
# Make a list of all eq
all_eq_dicts = all_eq_data['features']

# Pull the magnitudes, location datas
mags, lons, lats, hover_texts = [], [], [], []
for eq_dict in all_eq_dicts:
    mags.append(eq_dict['properties']['mag'])
    lons.append(eq_dict['geometry']['coordinates'][0])
    lats.append(eq_dict['geometry']['coordinates'][1])
    hover_texts.append(eq_dict['properties']['title'])
    
# Build a world map
data = [{
    'type': 'scattergeo',
    'lon': lons,
    'lat': lats,
    'text': hover_texts,
    'marker': {
        'size': [3*mag for mag in mags],
        'color': mags,
        'colorscale': 'Rainbow',
        'reversescale': True,
        'colorbar': {'title': 'Magnitude'},
    },
}]

my_layout = go.Layout(title=all_eq_data['metadata']['title'])
fig = go.Figure(data=data, layout=my_layout)

offline.iplot(fig)

#### Exercise: World Fires

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly import offline

# Pull the datas from the file
filename = 'data/world_fires_1_day.csv'
df = pd.read_csv(filename)
df.head()

In [None]:
brightness = df['brightness']/25

data = [{
    'type': 'scattergeo',
    'lon': df['longitude'],
    'lat': df['latitude'],
    'marker': {
        'size': brightness,
        'color': brightness,
        'opacity': 0.5,
        'colorscale': 'Rainbow',
        'colorbar': {'title': 'Brightness'},
        'line': {'color': 'rgba(135, 206, 250, 0.5)'},
    },
}]

my_layout = go.Layout(title='World Fires')
fig = go.Figure(data=data, layout=my_layout)

offline.plot(fig, filename='World Fires.html')

## Working with APIs

### Using a Web API

We’ll use GitHub’s API to request information about Python projects on the site, and then generate an interactive visualization of the relative popularity of these projects using Plotly.

In [45]:
# Processing an API Response

import requests

# Make an API call and stroe the response
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
headers = {'Accept': 'application/vnd.github.v3+json'}
r = requests.get(url, headers=headers)
print(f"Status code: {r.status_code}")

# Store API response
response_dict = r.json()

# Print results
print(response_dict.keys())

Status code: 200
dict_keys(['total_count', 'incomplete_results', 'items'])


In [46]:
# Working with the Response Dictionary

import requests

# Make an API call and stroe the response
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
headers = {'Accept': 'application/vnd.github.v3+json'}
r = requests.get(url, headers=headers)
print(f"Statue code: {r.status_code}")

# Print results
print(response_dict.keys())

# Store API response
response_dict = r.json()
print(f"Total repositories: {response_dict['total_count']}")

# Explore information about the repositories.
repo_dicts = response_dict['items']
print(f"Repositories returned: {len(repo_dicts)}")  

# Examine the first repository
repo_dict = repo_dicts[0]
print(f"\nKeys: {len(repo_dict)}")
for key in sorted(repo_dict.keys()):
    print(key)

Statue code: 200
dict_keys(['total_count', 'incomplete_results', 'items'])
Total repositories: 4959004
Number of items: 30

Keys: 74
archive_url
archived
assignees_url
blobs_url
branches_url
clone_url
collaborators_url
comments_url
commits_url
compare_url
contents_url
contributors_url
created_at
default_branch
deployments_url
description
disabled
downloads_url
events_url
fork
forks
forks_count
forks_url
full_name
git_commits_url
git_refs_url
git_tags_url
git_url
has_downloads
has_issues
has_pages
has_projects
has_wiki
homepage
hooks_url
html_url
id
issue_comment_url
issue_events_url
issues_url
keys_url
labels_url
language
languages_url
license
merges_url
milestones_url
mirror_url
name
node_id
notifications_url
open_issues
open_issues_count
owner
private
pulls_url
pushed_at
releases_url
score
size
ssh_url
stargazers_count
stargazers_url
statuses_url
subscribers_url
subscription_url
svn_url
tags_url
teams_url
trees_url
updated_at
url
watchers
watchers_count


In [None]:
# Pull out the values for some of the keys in repo_dict

print("\nSelected information about first repository:")
print(f"Name: {repo_dict['name']}")
print(f"Owner: {repo_dict['owner']['login']}")
print(f"Stars: {repo_dict['stargazers_count']}")
print(f"Repository: {repo_dict['html_url']}")
print(f"Created: {repo_dict['created_at']}")
print(f"Updated: {repo_dict['updated_at']}")
print(f"Description: {repo_dict['description']}")

In [None]:
# Summarizing the Top Repositories

print("\nSelected information about each repository:")
for repo_dict in repo_dicts:
    print(f"\nName: {repo_dict['name']}")
    print(f"Owner: {repo_dict['owner']['login']}")
    print(f"Stars: {repo_dict['stargazers_count']}")
    print(f"Repository: {repo_dict['html_url']}")
    print(f"Description: {repo_dict['description']}")

[Monitoring API Rate Limits](https://api.github.com/rate_limit)

### Visualizing Repositories Using Plotly

In [43]:
import requests
from plotly.graph_objs import bar
from plotly import offline

# Make an API call and stroe the response
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
headers = {'Accept': 'application/vnd.github.v3+json'}
r = requests.get(url, headers=headers)
print(f"Statue code: {r.status_code}")

# Store API response
response_dict = r.json()
repo_dicts = response_dict['items']

# Process results
repo_names, stars, labels = [], [], []
for repo_dict in repo_dicts:
    repo_names.append(repo_dict['name'])
    stars.append(repo_dict['stargazers_count'])
    
    # Adding Custom Tooltips
    owner = repo_dict['owner']['login']
    desc = repo_dict['description']
    label = f"<br>Owner: {owner}<br />Desc: {desc}"
    labels.append(label)
    
# Make visualization
data = [{
    'type': 'bar',
    'x': repo_names,
    'y': stars,
    'hovertext': labels,
    'marker': {
        'color': 'rgb(60, 100, 150)',
        'line': {'width': 1.5, 'color': 'rgb(25, 25, 25)'}
    },
    'opacity': 0.6,
}]

my_layout = {
    'title': 'Most starred Python projects on Github',
    'titlefont': {'size': 28},
    'xaxis': {
        'title': 'Repository',
        'titlefont': {'size': 24},
        'tickfont': {'size': 14},
    },
    'yaxis': {
        'title': 'Stars',
        'titlefont': {'size': 24},
        'tickfont': {'size': 14},
    },
}

fig = {'data': data, 'layout': my_layout}
offline.iplot(fig, filename='python-repos.html')

Statue code: 200


In [35]:
# Adding Clickable Links to Our Graph

import requests
from plotly.graph_objs import bar
from plotly import offline

# Make an API call and stroe the response
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
headers = {'Accept': 'application/vnd.github.v3+json'}
r = requests.get(url, headers=headers)
print(f"Statue code: {r.status_code}")

# Store API response
response_dict = r.json()
repo_dicts = response_dict['items']

# Process results
repo_links, stars, labels = [], [], []
for repo_dict in repo_dicts:
    # Adding clickable links
    repo_name = repo_dict['name']
    repo_url = repo_dict['html_url']
    repo_link = f"<a href='{repo_url}'>{repo_name}</a>"
    repo_links.append(repo_link)
    
    stars.append(repo_dict['stargazers_count'])
    
    # Adding Custom Tooltips
    owner = repo_dict['owner']['login']
    desc = repo_dict['description']
    label = f"<br>Owner: {owner}<br />Desc: {desc}"
    labels.append(label)
    
# Make visualization
data = [{
    'type': 'bar',
    'x': repo_links,
    'y': stars,
    'hovertext': labels,
    'marker': {
        'color': 'rgb(60, 100, 150)',
        'line': {'width': 1.5, 'color': 'rgb(25, 25, 25)'}
    },
    'opacity': 0.6,
}]

my_layout = {
    'title': 'Most starred Python projects on Github',
    'titlefont': {'size': 28},
    'xaxis': {
        'title': 'Repository',
        'titlefont': {'size': 24},
        'tickfont': {'size': 14},
    },
    'yaxis': {
        'title': 'Stars',
        'titlefont': {'size': 24},
        'tickfont': {'size': 14},
    },
}

fig = {'data': data, 'layout': my_layout}
offline.iplot(fig, filename='python-repos.html')

Statue code: 200


#### Exercise: Continent

In [None]:
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import math

data = px.data.gapminder()
df_2007 = data[data['year']==2007]
df_2007 = df_2007.sort_values(['continent', 'country'])

bubble_size = []

for index, row in df_2007.iterrows():
    bubble_size.append(math.sqrt(row['pop']))

df_2007['size'] = bubble_size
continent_names = ['Africa', 'Americas', 'Asia', 'Europe', 'Oceania']
continent_data = {continent:df_2007.query("continent == '%s'" %continent)
                              for continent in continent_names}

fig = go.Figure()

for continent_name, continent in continent_data.items():
    fig.add_trace(go.Scatter(
        x=continent['gdpPercap'],
        y=continent['lifeExp'],
        name=continent_name,
        text=df_2007['continent'],
        hovertemplate=
        "<b>%{text}</b><br><br>" +
        "GDP per Capita: %{y:$,.0f}<br>" +
        "Life Expectation: %{x:.0%}<br>" +
        "Population: %{marker.size:,}" +
        "<extra></extra>",
        marker_size=continent['size'],
        ))

fig.update_traces(
    mode='markers',
    marker={'sizemode':'area',
            'sizeref':10})

fig.update_layout(
    xaxis={
        'title':'GDP per capita',
        'type':'log'},
    yaxis={'title':'Life Expectancy (years)'})

fig.show()

### The Hacker News API

https://news.ycombinator.com/

In [None]:
import requests
import json

# Make an API call and store the response
url = 'https://hacker-news.firebaseio.com/v0/item/19155826.json'
r = requests.get(url)
print(f"Status code: {r.status_code}")

# Explore the structure of the response
response_dict = r.json()
with open('data/readable_hn_data.json', 'w') as f:
    json.dump(response_dict, f, indent=4)

In [1]:
from operator import itemgetter

import requests

# Make an API call and store the response.
url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
r = requests.get(url)
print(f"Status code: {r.status_code}")

# Process information about each submission.
submission_ids = r.json()
submission_dicts = []
for submission_id in submission_ids[:10]:
    # Make a separate API call for each submission.
    url = f"https://hacker-news.firebaseio.com/v0/item/{submission_id}.json"
    r = requests.get(url)
    print(f"id: {submission_id}\tstatus: {r.status_code}")
    response_dict = r.json()
    
    # Build a dictionary for each article.
    submission_dict = {
        'title': response_dict['title'],
        'hn_link': f"http://news.ycombinator.com/item?id={submission_id}",
        'comments': response_dict['descendants'],
    }
    submission_dicts.append(submission_dict)
    
submission_dicts = sorted(submission_dicts, key=itemgetter('comments'),
                            reverse=True)

for submission_dict in submission_dicts:
    print(f"\nTitle: {submission_dict['title']}")
    print(f"Discussion link: {submission_dict['hn_link']}")
    print(f"Comments: {submission_dict['comments']}")


Status code: 200
id: 22911533	status: 200
id: 22911379	status: 200
id: 22909984	status: 200
id: 22908224	status: 200
id: 22908044	status: 200
id: 22910702	status: 200
id: 22910269	status: 200
id: 22905211	status: 200
id: 22909998	status: 200
id: 22909419	status: 200

Title: TablePlus – Modern, Native Tool for Database Management
Discussion link: http://news.ycombinator.com/item?id=22908224
Comments: 136

Title: Show HN: Gentle is a social app where you give and get kindness
Discussion link: http://news.ycombinator.com/item?id=22909419
Comments: 80

Title: I've been writing TILs for 5 years
Discussion link: http://news.ycombinator.com/item?id=22908044
Comments: 74

Title: Scuttlebot: Peer-to-peer database, identity provider, and messaging system
Discussion link: http://news.ycombinator.com/item?id=22909984
Comments: 73

Title: It’s Time to Build
Discussion link: http://news.ycombinator.com/item?id=22911533
Comments: 39

Title: The Pervert's Guide to Computer Programming Languages (2017)

In [24]:
import requests
import json
from operator import itemgetter
import textwrap
from plotly.graph_objects import bar
from plotly import offline

# Make an API call
url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
r = requests.get(url)
print(f"Status code: {r.status_code}")

# Process information about each submission.
submission_ids = r.json()
submission_dicts = []
# List the 5 top articles
for submission_id in submission_ids[:10]:
    # Make a separate API call for each submission.
    url = f"https://hacker-news.firebaseio.com/v0/item/{submission_id}.json"
    r = requests.get(url)
    print(f"id: {submission_id}\tstatus: {r.status_code}")
    response_dict = r.json()
    
    # Build a dictionary for each article.
    submission_dict = {
        'title': response_dict['title'],
        'hn_link': f"http://news.ycombinator.com/item?id={submission_id}",
        'comments': response_dict['descendants'],
    }
    submission_dicts.append(submission_dict)

Status code: 200
id: 22911533	status: 200
id: 22912378	status: 200
id: 22911379	status: 200
id: 22909366	status: 200
id: 22909984	status: 200
id: 22912226	status: 200
id: 22908224	status: 200
id: 22907200	status: 200
id: 22908044	status: 200
id: 22905211	status: 200


In [25]:
# Sort the article by the number of comments
submission_dicts = sorted(submission_dicts, key=itemgetter('comments'),
                          reverse=True)

In [26]:
# Print each article
for submission_dict in submission_dicts:
    print(f"\nTitle: {submission_dict['title']}")
    print(f"Discussion link: {submission_dict['hn_link']}")
    print(f"Comments: {submission_dict['comments']}")


Title: It’s Time to Build
Discussion link: http://news.ycombinator.com/item?id=22911533
Comments: 365

Title: TablePlus – Modern, Native Tool for Database Management
Discussion link: http://news.ycombinator.com/item?id=22908224
Comments: 151

Title: I've been writing TILs for 5 years
Discussion link: http://news.ycombinator.com/item?id=22908044
Comments: 97

Title: Scuttlebot: Peer-to-peer database, identity provider, and messaging system
Discussion link: http://news.ycombinator.com/item?id=22909984
Comments: 82

Title: iPhone 7 boots postmarketOS
Discussion link: http://news.ycombinator.com/item?id=22911379
Comments: 42

Title: Covid-19’s impact on Tor
Discussion link: http://news.ycombinator.com/item?id=22912378
Comments: 29

Title: Show HN: Explore Wikipedia edits made by institutions, companies and governments
Discussion link: http://news.ycombinator.com/item?id=22907200
Comments: 23

Title: Mystery of two-million-year-old stone balls solved
Discussion link: http://news.ycombinato

In [44]:
# Process results
article_links, article_titles, comments = [], [], []
for submission_dict in submission_dicts:
    title = submission_dict['title']
    article_titles.append(title)
    
    url = submission_dict['hn_link']
    article_link = f"<a href='{url}'>{textwrap.shorten(title, width=50, placeholder='...')}</a>"
    article_links.append(article_link)
    
    comments.append(submission_dict['comments'])

# Make a visualization
data = [{
    'type': 'bar',
    'x': article_links,
    'y': comments,
    'hovertext': article_titles,
}]

chart_title = 'Discussions about 10 Top Stories on Hacker News'
my_layout = {
    'title': chart_title,
    'titlefont': {'size': 24},
    'xaxis': {
        'title': 'Articles',
        'titlefont': {'size': 18},
        'tickfont': {'size': 14},
    },
    'yaxis': {
        'title': 'Comments',
        'titlefont': {'size': 18},
        'tickfont': {'size': 14},
    },
}

fig = {'data': data, 'layout': my_layout}
offline.iplot(fig)

#### Exercise: Most Starred JavaScript Projects on GitHub

Modify the API call in python_repos.py so it generates a chart showing the most popular projects in other languages. Try languages such as JavaScript, Ruby, C, Java, Perl, Haskell, and Go.

In [41]:
import requests
from plotly.graph_objs import bar
from plotly import offline

def get_most_starred(your_language):
    """Explore the most starred projects based on your interested languages"""
    
    # Make an API call and stroe the response
    url = f"https://api.github.com/search/repositories?q=language:{your_language.lower()}&sort=stars"
    headers = {'Accept': 'application/vnd.github.v3+json'}
    r = requests.get(url, headers=headers)
    print(f"Statue code: {r.status_code}")

    # Store API response
    response_dict = r.json()
    repo_dicts = response_dict['items']

    # Process results
    repo_links, stars, labels = [], [], []
    for repo_dict in repo_dicts:
        # Adding clickable links
        repo_name = repo_dict['name']
        repo_url = repo_dict['html_url']
        repo_link = f"<a href='{repo_url}'>{repo_name}</a>"
        repo_links.append(repo_link)

        stars.append(repo_dict['stargazers_count'])

        # Adding Custom Tooltips
        owner = repo_dict['owner']['login']
        desc = repo_dict['description']
        label = f"<br>Owner: {owner}<br />Desc: {desc}"
        labels.append(label)

    # Make visualization
    data = [{
        'type': 'bar',
        'x': repo_links,
        'y': stars,
        'hovertext': labels,
        'marker': {
            'color': 'rgb(60, 100, 150)',
            'line': {'width': 1.5, 'color': 'rgb(25, 25, 25)'}
        },
        'opacity': 0.6,
    }]

    my_layout = {
        'title': f"Most starred {your_language.title()} projects on Github",
        'titlefont': {'size': 24},
        'xaxis': {
            'title': 'Repository',
            'titlefont': {'size': 18},
            'tickfont': {'size': 14},
        },
        'yaxis': {
            'title': 'Stars',
            'titlefont': {'size': 18},
            'tickfont': {'size': 14},
        },
    }

    fig = {'data': data, 'layout': my_layout}
    offline.iplot(fig)
    
get_most_starred('JAVASCRIPT')

Statue code: 200


#### Exercise: Most Popular Users Based on Languages

In [50]:
# Find out an elite group of GitHub users with most followers

import requests
from plotly.graph_objs import bar
from plotly import offline

# Make an API call and stroe the response
url = 'https://api.github.com/search/users?q=language%3Ajavascript+followers%3A%3E%3D100&type=Users'
headers = {'Accept': 'application/vnd.github.v3+json'}
r = requests.get(url, headers=headers)
print(f"Statue code: {r.status_code}")

# Store API response
response_dict = r.json()
print(response_dict.keys())
print(f"Total users: {response_dict['total_count']}")

# Explore information about the users
github_users = response_dict['items']
print(f"Users returned: {len(github_users)}")

# Examine the first user
print(f"\nKeys: {len(github_users[0])}")
for key in sorted(github_users[0].keys()):
    print(f"{key}: {github_users[0][key]}")

Statue code: 200
dict_keys(['total_count', 'incomplete_results', 'items'])
Total users: 10496
Users returned: 30

Keys: 19
avatar_url: https://avatars0.githubusercontent.com/u/905434?v=4
events_url: https://api.github.com/users/ruanyf/events{/privacy}
followers_url: https://api.github.com/users/ruanyf/followers
following_url: https://api.github.com/users/ruanyf/following{/other_user}
gists_url: https://api.github.com/users/ruanyf/gists{/gist_id}
gravatar_id: 
html_url: https://github.com/ruanyf
id: 905434
login: ruanyf
node_id: MDQ6VXNlcjkwNTQzNA==
organizations_url: https://api.github.com/users/ruanyf/orgs
received_events_url: https://api.github.com/users/ruanyf/received_events
repos_url: https://api.github.com/users/ruanyf/repos
score: 1.0
site_admin: False
starred_url: https://api.github.com/users/ruanyf/starred{/owner}{/repo}
subscriptions_url: https://api.github.com/users/ruanyf/subscriptions
type: User
url: https://api.github.com/users/ruanyf


In [63]:
# Make a seperate API call for the first 5 top users
user_dicts = []
for github_user in github_users[:5]:
    user_api = github_user['url']
    r = requests.get(user_api, headers=headers)
    response_dict = r.json()
    print(f"ID: {response_dict['id']}\tStatus: {r.status_code}")
    
    # Create a dict for each user
    user_dict = {
        'id': response_dict['id'],
        'name': response_dict['name'],
        'location': response_dict['location'],
        'avatar_url': response_dict['avatar_url'],
        'github': response_dict['html_url'],
        'blog': response_dict['blog'],
        'followers': response_dict['followers'],
        'created_at': response_dict['created_at'],
        'updated_at': response_dict['updated_at'],
    }
    user_dicts.append(user_dict)

ID: 905434	Status: 200
ID: 499550	Status: 200
ID: 810438	Status: 200
ID: 25254	Status: 200
ID: 170270	Status: 200


In [64]:
user_dicts[0]

{'id': 905434,
 'name': 'Ruan YiFeng',
 'location': 'Shanghai, China',
 'avatar_url': 'https://avatars0.githubusercontent.com/u/905434?v=4',
 'github': 'https://github.com/ruanyf',
 'blog': 'https://twitter.com/ruanyf',
 'followers': 60123,
 'created_at': '2011-07-10T01:07:17Z',
 'updated_at': '2020-04-07T19:23:13Z'}

In [67]:
import pandas as pd
pd.DataFrame.from_dict(user_dicts)

Unnamed: 0,id,name,location,avatar_url,github,blog,followers,created_at,updated_at
0,905434,Ruan YiFeng,"Shanghai, China",https://avatars0.githubusercontent.com/u/90543...,https://github.com/ruanyf,https://twitter.com/ruanyf,60123,2011-07-10T01:07:17Z,2020-04-07T19:23:13Z
1,499550,Evan You,New Jersey / China,https://avatars1.githubusercontent.com/u/49955...,https://github.com/yyx990803,http://evanyou.me,57583,2010-11-28T01:05:40Z,2020-04-13T12:55:11Z
2,810438,Dan Abramov,,https://avatars0.githubusercontent.com/u/81043...,https://github.com/gaearon,http://twitter.com/dan_abramov,55292,2011-05-25T18:18:31Z,2020-04-08T17:22:59Z
3,25254,TJ Holowaychuk,"London, UK",https://avatars2.githubusercontent.com/u/25254...,https://github.com/tj,https://apex.sh,42672,2008-09-18T22:37:28Z,2020-04-03T16:08:35Z
4,170270,Sindre Sorhus,,https://avatars1.githubusercontent.com/u/17027...,https://github.com/sindresorhus,https://sindresorhus.com,37299,2009-12-20T22:57:02Z,2020-04-16T07:07:07Z


In [35]:
# Process results
user_links, followers, labels = [], [], []
for repo_dict in repo_dicts:
    # Adding clickable links
    repo_name = repo_dict['name']
    repo_url = repo_dict['html_url']
    repo_link = f"<a href='{repo_url}'>{repo_name}</a>"
    repo_links.append(repo_link)
    
    stars.append(repo_dict['stargazers_count'])
    
    # Adding Custom Tooltips
    owner = repo_dict['owner']['login']
    desc = repo_dict['description']
    label = f"<br>Owner: {owner}<br />Desc: {desc}"
    labels.append(label)
    
# Make visualization
data = [{
    'type': 'bar',
    'x': repo_links,
    'y': stars,
    'hovertext': labels,
    'marker': {
        'color': 'rgb(60, 100, 150)',
        'line': {'width': 1.5, 'color': 'rgb(25, 25, 25)'}
    },
    'opacity': 0.6,
}]

my_layout = {
    'title': 'Most starred Python projects on Github',
    'titlefont': {'size': 28},
    'xaxis': {
        'title': 'Repository',
        'titlefont': {'size': 24},
        'tickfont': {'size': 14},
    },
    'yaxis': {
        'title': 'Stars',
        'titlefont': {'size': 24},
        'tickfont': {'size': 14},
    },
}

fig = {'data': data, 'layout': my_layout}
offline.iplot(fig, filename='python-repos.html')

Statue code: 200
