In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import requests
from bs4 import BeautifulSoup
import re

In [2]:
def augment(xold,yold,numsteps):
    xnew = []
    ynew = []
    for i in range(len(xold)-1):
        difX = xold[i+1]-xold[i]
        stepsX = difX/numsteps
        difY = yold[i+1]-yold[i]
        stepsY = difY/numsteps
        for s in range(numsteps):
            xnew = np.append(xnew,xold[i]+s*stepsX)
            ynew = np.append(ynew,yold[i]+s*stepsY)
    return xnew,ynew

In [3]:
city1 = input("Please enter a city (all lowercase): ")
city2 = input("Please enter a city (all lowercase): ")
city3 = input("Please enter a city (all lowercase): ")
cities = [city1, city2, city3]

Please enter a city (all lowercase): portland
Please enter a city (all lowercase): seattle
Please enter a city (all lowercase): boston


In [4]:
def make_df(i):
    # data extraction and data cleaning
    r = requests.get('http://worldpopulationreview.com/us-cities/' + cities[i] + '-population/')
    c = r.content
    soup = BeautifulSoup(c, "lxml")
    soup
    main_content = soup.find('div', attrs = {'class': 'section-container clearfix'})
    main_content
    rows = main_content.find_all('tr')
    year = []
    pop = []
    rows = rows[1:]
    for row in rows:
        row_td = row.find_all('td')
        str_cells = str(row_td)
        cleantext = BeautifulSoup(str_cells, "lxml").get_text()
        cleantext = cleantext[1:-1]
        text = cleantext.split(', ')
        year.append(int(text[0]))
        pop.append(int(text[1].replace(',', '')))

    #make dataframe
    cities[i] = (cities[i])[0].upper() + (cities[i])[1::]
    x = np.array(year)
    y = np.array(pop)
    XN,YN = augment(x,y,10)
    data = {'Population': YN, 'Year': XN}
    augmented = pd.DataFrame(data, columns = ['Year', 'Population'])
    df1 = augmented
    df1 = df1[::-1]
    return df1

# make a separate dataframe for each city
dfs = []
for i in range(3):
    dfs.append(make_df(i))

In [6]:
from functools import reduce
df_final = reduce(lambda left,right: pd.merge(left,right,on='Year'), dfs)
df_final

Unnamed: 0,Year,Population_x,Population_y,Population
0,1871.0,9221.4,1343.3,261757.3
1,1872.0,10149.8,1586.6,272988.6
2,1873.0,11078.2,1829.9,284219.9
3,1874.0,12006.6,2073.2,295451.2
4,1875.0,12935.0,2316.5,306682.5
5,1876.0,13863.4,2559.8,317913.8
6,1877.0,14791.8,2803.1,329145.1
7,1878.0,15720.2,3046.4,340376.4
8,1879.0,16648.6,3289.7,351607.7
9,1880.0,17577.0,3533.0,362839.0


In [7]:
Writer = animation.writers['ffmpeg']
writer = Writer(fps=len(df_final)/2, metadata=dict(artist='Me'), bitrate=1800)

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
plt.xlim((df_final['Year'])[0], (df_final['Year'])[len(df_final)-1])
plt.ylim(min(df_final[['Population_x','Population_y', 'Population']].min(axis=1)), max(df_final[['Population_x','Population_y', 'Population']].max(axis=1)))
plt.title('Population Growth',fontsize=20)

def animate(j):
    if (j == len(df_final) - 1):
        data = df_final.iloc[:int(j+1)] #select data range
        p = sns.lineplot(x=data['Year'], y=data['Population_x'], data=data, color="b", label=cities[0])
        p.tick_params(labelsize=14)
        plt.setp(p.lines,linewidth=2)
        q = sns.lineplot(x=data['Year'], y=data['Population_y'], data=data, color="r", label=cities[1])
        plt.setp(q.lines, linewidth=2)
        r = sns.lineplot(x=data['Year'], y=data['Population'], data=data, color="g", label=cities[2])
        plt.setp(r.lines, linewidth=2)
        ax.legend(handles=(p, q, r))
    else:
        data = df_final.iloc[:int(j+1)] #select data range
        p = sns.lineplot(x=data['Year'], y=data['Population_x'], data=data, color="b")
        p.tick_params(labelsize=14)
        plt.setp(p.lines,linewidth=2)
        q = sns.lineplot(x=data['Year'], y=data['Population_y'], data=data, color="r")
        plt.setp(q.lines, linewidth=2)
        r = sns.lineplot(x=data['Year'], y=data['Population'], data=data, color="g")
        plt.setp(r.lines, linewidth=2)
    
plt.xlabel('Year',fontsize=16)
plt.ylabel('Population',fontsize=16)
ani = animation.FuncAnimation(fig, animate, frames=len(df_final), repeat=True)
ani.save('test.gif', writer='imagemagick', fps=50)