In [None]:
import numpy as np
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import os
# os.chdir(r"Your directory here") 

# I've chosen codes manually 
wdiSeries = pd.read_csv('wdiSeries.csv', encoding = "ISO-8859-1") 
# This is the base dataset
uc = ['Country Code', 'Table Name', 'Region']
wdi_country = pd.read_csv("WDI_Country.csv", usecols = uc, encoding = "ISO-8859-1")
# This is the large dataset with the yearly indicators
uc = ['Country Name', 'Country Code', 'Indicator Code', '2007', '2008',
      '2009','2010', '2011', '2012', '2013']
wdi_data = pd.read_csv("WDI_Data.csv", usecols = uc, encoding = "ISO-8859-1")
# A list of the codes that we want to pull out of the wdi_data
uS = wdiSeries['Series Code'][wdiSeries['Unnamed: 3'] == 1].tolist() 
ind_code = wdi_data['Indicator Code']
wdi_data = wdi_data[ind_code.isin(uS)]
wdi_data['avg0709'] = wdi_data[['2007', '2008', '2009']].mean(1)
wdi_data['delta0912'] = wdi_data['2012'] - wdi_data['2009']

def build_DF(ctry):
    groups = wdi_data.groupby('Country Code').get_group(ctry)[['Indicator Code', 'avg0709']].transpose()
    indCode = groups.loc['Indicator Code']
    groups = groups.rename(columns = indCode)
    groups['country_code'] = ctry
    res = groups.loc['avg0709']
    res['GDPdelta'] = wdi_data.delta0912[(wdi_data['Indicator Code'] == 'NY.GDP.MKTP.KD')
                                         & (wdi_data['Country Code'] == ctry)].values[0]
    
    res['GDPpercentgrowth'] = (res.GDPdelta / (wdi_data['2007'][(wdi_data['Indicator Code'] == 'NY.GDP.MKTP.KD')
                                                                & (wdi_data['Country Code'] == ctry)].values[0]))*100
    
    res['region'] = wdi_country.Region[wdi_country['Country Code'] == ctry].values[0]
    res['country_name'] = wdi_country['Table Name'][wdi_country['Country Code'] == ctry].values[0]
    return res

builtDF = pd.DataFrame()

builtDF = builtDF.append(wdi_country['Country Code'].apply(build_DF))
'''
# This is how you would accomplish the above line in a for loop.
for rw in wdi_country.iterrows():
    dfRow = build_DF(rw[1]['Country Code'])
    builtDF = builtDF.append(dfRow, ignore_index = True)
'''
region = builtDF.region
builtDF = pd.get_dummies(builtDF, columns = ['region'])
builtDF['region'] = region
builtDF = builtDF.fillna(value = 0)

def indicate_growth(gr):
    if gr < 5:
        return -1
    elif gr > 20:
        return 1
    else:
        return 0

builtDF = builtDF[builtDF.GDPpercentgrowth.notnull()]
builtDF = builtDF[builtDF.region != 0]
builtDF['growthInd'] = builtDF.GDPpercentgrowth.apply(indicate_growth)

print(builtDF.dtypes)
os.chdir(r'C://Users//rlantz')
builtDF.to_csv(r'built_DF.csv', encoding = "ISO-8859-1")

In [None]:
import pandas as pd
import numpy as np
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout, Bar

init_notebook_mode(connected = True) # Allows us to use plotly in offline mode

bDF = pd.read_csv(r'built_DF.csv', encoding = "ISO-8859-1") # A dataset that I've curated

trace1 = Scatter(
    x = bDF['EN.POP.DNST'], # Population density
    y = bDF['EG.ELC.HYRO.ZS'], # Hydroelectric energy production as a % of total
    mode = 'markers'
)

layout = dict(title = 'Population Density vs Hydroelectric Production by Country',
             yaxis = dict(zeroline = False,
                         title = 'Hydroelectric Production'),
             xaxis = dict(zeroline = True,
                         title = 'Log of Population Density',
                         type = 'log' 
                         ))
data = [trace1]
fig = dict(data = data, layout = layout)
iplot(fig)

In [None]:
# Countries in the top quartile of relative GDP growth between 2009 and 2012
highPerformers = bDF[bDF.growthInd == 1] 
# Countries in the middle two quartiles of relative GDP growth between 2009 and 2012
medPerformers = bDF[bDF.growthInd == 0] 
# Countries in the bottom quartile of relative GDP growth between 2009 and 2012
lowPerformers = bDF[bDF.growthInd == -1] 

# NE.TRD.GNFS.ZS is trade as a % of GDP (it's possible to have more trade than GDP)
# NY.GDP.MKTP.CD is GDP in constant 2010 US$
# MS.MIL.XPND.GD.ZS is military expenditure as a % of GDP scaled to appear larger (*10)
traceHigh = Scatter(x = highPerformers['NE.TRD.GNFS.ZS'], 
                    y = highPerformers['NY.GDP.MKTP.CD'],
                    name = 'HighPerformers',
                    mode = 'markers',
                    marker = dict(
                                  color = '#cc7c2c',
                                  size = highPerformers['MS.MIL.XPND.GD.ZS']*10, 
                                  sizemode = 'diameter',
                                  sizemin = 1
                                  ),
                    line = dict(width = 2,
                                color = 'rgb(0,0,0)')
                   )

traceMed = Scatter(x = medPerformers['NE.TRD.GNFS.ZS'],
                    y = medPerformers['NY.GDP.MKTP.CD'],
                    name = 'MedianPerformers',
                    mode = 'markers',
                    marker = dict(
                                  color = '#636b77',
                                  size = medPerformers['MS.MIL.XPND.GD.ZS'] * 10,
                                  sizemode = 'diameter',
                                  sizemin = 1
                                  ),
                    line = dict(width = 2,
                                color = 'rgb(0,0,0)')
                   )

traceLow = Scatter(x = lowPerformers['NE.TRD.GNFS.ZS'],
                    y = lowPerformers['NY.GDP.MKTP.CD'],
                    name = 'LowPerformers',
                    mode = 'markers',
                    marker = dict(
                                  color = '#3270d3',
                                  size = lowPerformers['MS.MIL.XPND.GD.ZS'] * 10,
                                  sizemode = 'diameter',
                                  sizemin = 1
                                  ),
                    line = dict(width = 2,
                                color = 'rgb(0,0,0)')
                   )

data = [traceHigh, traceMedian, traceLow]

layout = dict(title = 'Military Expenditure (size) and GDP growth between 2009 and 2012 (color)',
             yaxis = dict(zeroline = False,
                         title = 'GDP in constant 2010 US$',
                         type = 'log'),
             xaxis = dict(zeroline = False,
                         title = 'Trade as a % of GDP',
                         range = [-10, 175], 
                         ))

fig = dict(data = data, layout = layout)
iplot(fig)

In [None]:
import re
def yourFunc(myStr):
    phnNum = re.compile(r'(\d{3})\D{0,3}(\d{3})\D{0,3}(\d{4})')
    try:
        rTup = phnNum.search(myStr).groups()
        res = str(rTup[0]) + '-' + str(rTup[1]) + '-' + str(rTup[2])
    except:
        res = 'no phone number'
    return res
print(yourFunc('This has a phone number in it (800)-123-4567 and it should be found.'))