# 2013 US Payment Data Analysis

In [None]:
import pandas as pd
from plotly import plotly

In [None]:
df = pd.read_csv('../input/general_payments.csv', low_memory=False,
                 usecols=['Recipient_State', 'Total_Amount_of_Payment_USDollars'])

In [None]:
df.columns = ['code', 'amount']

In [None]:
# 美国各州缩写及全称字典
#  abbr. and full name
code = {'AL': 'Alabama',
        'AK': 'Alaska',
        'AZ': 'Arizona',
        'AR': 'Arkansas',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
        'HI': 'Hawaii',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'IA': 'Iowa',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'ME': 'Maine',
        'MD': 'Maryland',
        'MA': 'Massachusetts',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MS': 'Mississippi',
        'MO': 'Missouri',
        'MT': 'Montana',
        'NE': 'Nebraska',
        'NV': 'Nevada',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NY': 'New York',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VT': 'Vermont',
        'VA': 'Virginia',
        'WA': 'Washington',
        'WV': 'West Virginia',
        'WI': 'Wisconsin',
        'WY': 'Wyoming'}

In [None]:
code_df = pd.DataFrame.from_dict(code, orient='index').reset_index()
code_df.columns = ['code', 'state']

In [None]:
code_df.head()

In [None]:
desc = lambda x: {'mean': x.mean(), 'sum': x.sum(), 'count': x.count(), 'max': x.max()}
df = df['amount'].groupby(df['code']).apply(desc).unstack().reset_index()

In [None]:
df.head()

In [None]:
df = pd.merge(df, code_df)

In [None]:
df.head()

In [None]:
locations = df['code']
colorscale = [[0.0, 'rgb(242,240,247)'], [0.2, 'rgb(218,218,235)'],
              [0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'], [0.8, 'rgb(117,107,177)'], [1.0, 'rgb(84,39,143)']]

## 1. Average Amount of Payment

In [None]:
df.sort_values('mean', ascending=False)[['code', 'mean', 'state']].head(10)

In [None]:
p = df['mean']
labels = df['state']

data = [{'type': 'choropleth',
         'colorscale': colorscale,
         'autocolorscale': False,
         'locations': locations,
         'z': p,
         'locationmode': 'USA-states',
         'text': labels,
         'marker': {'line': {'color': 'rgb(255,255,255)', 'width': 2}},
         'colorbar': {'title': 'USD'}}]

In [None]:
layout = {'title': '2013 US Average Amount of Payment Across States', # 2013年美国各州的平均支付金额
          'geo': {'scope': 'usa',
                   'projection': {'type': 'albers usa'},
                   'showlakes': True,
                   'lakecolor': 'rgb(255, 255, 255)'}}

In [None]:
fig = dict(data=data, layout=layout)

# This code can't run on Kaggle, You have to run on your own computer.
# url = plotly.plot(fig, filename='2013 US Average Amount of Payment Across States')

<div>
    <a href="https://plot.ly/~fjwCode/8/?share_key=hNlSn4MrmADW9MFIRwswsz" target="_blank" title="2013 US Average Amount of Payment Across States" style="display: block; text-align: center;"><img src="https://plot.ly/~fjwCode/8.png?share_key=hNlSn4MrmADW9MFIRwswsz" alt="2013 US Average Amount of Payment Across States" style="max-width: 100%;width: 600px;"  width="600" onerror="this.onerror=null;this.src='https://plot.ly/404.png';" /></a>
    <script data-plotly="fjwCode:8" sharekey-plotly="hNlSn4MrmADW9MFIRwswsz" src="https://plot.ly/embed.js" async></script>
</div>

The highest average amount of payments is from **Vermont**.

## 2. Total Amount of Payment

In [None]:
df.sort_values('sum', ascending=False)[['code', 'sum', 'state']].head(10)

In [None]:
p = df['sum']
labels = df['state']

data = [{'type': 'choropleth',
         'colorscale': colorscale,
         'autocolorscale': False,
         'locations': locations,
         'z': p,
         'locationmode': 'USA-states',
         'text': labels,
         'marker': {'line': {'color': 'rgb(255,255,255)', 'width': 2}},
         'colorbar': {'title': 'USD'}}]

In [None]:
layout = {'title': '2013 US Total Amount of Payment Across States',  # 2013年美国各州的支付总额
          'geo': {'scope': 'usa',
                   'projection': {'type': 'albers usa'},
                   'showlakes': True,
                   'lakecolor': 'rgb(255, 255, 255)'}}

In [None]:
fig = dict(data=data, layout=layout)

# This code can't run on Kaggle, You have to run on your own computer.
# url = plotly.plot(fig, filename='2013 US Total Amount of Payment Across States')

<div>
    <a href="https://plot.ly/~fjwCode/4/?share_key=I4VunPy9N0M2qwMCyCgD4X" target="_blank" title="2013 US Total Amount of Payment Across States" style="display: block; text-align: center;"><img src="https://plot.ly/~fjwCode/4.png?share_key=I4VunPy9N0M2qwMCyCgD4X" alt="2013 US Total Amount of Payment Across States" style="max-width: 100%;width: 600px;"  width="600" onerror="this.onerror=null;this.src='https://plot.ly/404.png';" /></a>
    <script data-plotly="fjwCode:4" sharekey-plotly="I4VunPy9N0M2qwMCyCgD4X" src="https://plot.ly/embed.js" async></script>
</div>

The highest total amount of payments is from **California**.

## 3. Number of Payments

In [None]:
df.sort_values('count', ascending=False)[['code', 'count', 'state']].head(10)

In [None]:
p = df['count']
labels = df['state']

data = [{'type': 'choropleth',
         'colorscale': colorscale,
         'autocolorscale': False,
         'locations': locations,
         'z': p,
         'locationmode': 'USA-states',
         'text': labels,
         'marker': {'line': {'color': 'rgb(255,255,255)', 'width': 2}},
         'colorbar': {'title': 'USD'}}]

In [None]:
layout = {'title': '2013 US Number of Payments Across States',  # 2013年美国各州的付款次数
          'geo': {'scope': 'usa',
                   'projection': {'type': 'albers usa'},
                   'showlakes': True,
                   'lakecolor': 'rgb(255, 255, 255)'}}

In [None]:
fig = dict(data=data, layout=layout)

# This code can't run on Kaggle, You have to run on your own computer.
# url = plotly.plot(fig, filename='2013 US Number of Payments Across States')

<div>
    <a href="https://plot.ly/~fjwCode/6/?share_key=uBcs8chxKhbwwaDvTPeonG" target="_blank" title="2013 US Number of Payments Across States" style="display: block; text-align: center;"><img src="https://plot.ly/~fjwCode/6.png?share_key=uBcs8chxKhbwwaDvTPeonG" alt="2013 US Number of Payments Across States" style="max-width: 100%;width: 600px;"  width="600" onerror="this.onerror=null;this.src='https://plot.ly/404.png';" /></a>
    <script data-plotly="fjwCode:6" sharekey-plotly="uBcs8chxKhbwwaDvTPeonG" src="https://plot.ly/embed.js" async></script>
</div>

The highest number of payments is from **California**.