## amt.pledged, currency & goal columns

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import seaborn as sns

In [2]:
# Read in and examine data
df = pd.read_csv('most_backed.csv')
del df['Unnamed: 0']
df.head(1)

Unnamed: 0,amt.pledged,blurb,category,currency,goal,location,num.backers,num.backers.tier,pledge.tier,title,url
0,8782571.0,\nThis is a card game for people who are into ...,Tabletop Games,usd,10000.0,"Los Angeles, CA",219382,"[15505, 202934, 200, 5]","[20.0, 35.0, 100.0, 500.0]",Exploding Kittens,/projects/elanlee/exploding-kittens


In [3]:
# Create sub dataframe for only money-related columns
sub = df[['amt.pledged', 'goal', 'currency']]

# Rename amt.pledged column
sub.rename(columns={'amt.pledged': 'amt_pledged'}, inplace=True)

# Examine new sub dataframe
sub.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  **kwargs)


Unnamed: 0,amt_pledged,goal,currency
0,8782571.0,10000.0,usd
1,6465690.0,15000.0,usd
2,5408916.0,1000000.0,usd
3,5702153.0,2000000.0,usd
4,3336371.0,400000.0,usd


In [4]:
# Check unique currencies
df['currency'].value_counts()

usd    3437
gbp     252
cad     128
eur      96
aud      53
sek      14
nzd      10
dkk       7
chf       3
Name: currency, dtype: int64

In [5]:
# Create exchange rate dictionary (note: exchange rates based on 11/9/16)
exchange_dict = {
    'usd': 1.00,
    'gbp': 1.25,
    'cad': 0.75,
    'eur': 1.09,
    'aud': 0.77,
    'sek': 0.11,
    'nzd': 0.73,
    'dkk': 0.15,
    'chf': 1.02,}

# Create a new column that maps out exchange rate based on currency
sub['exchange_rate'] = sub['currency'].map(exchange_dict)

# Create new column that converts all data to USD 
sub['pledged_USD'] = sub['amt_pledged'] * sub['exchange_rate']
sub['goal_USD'] = sub['goal'] * sub['exchange_rate']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [6]:
# Just checking to make sure conversions worked
sub[sub['currency'] == 'chf'].loc[:, ['currency', 'amt_pledged', 'pledged_USD', 'goal', 'goal_USD']]

Unnamed: 0,currency,amt_pledged,pledged_USD,goal,goal_USD
1857,chf,530792.0,541407.84,80000.0,81600.0
2023,chf,53862.0,54939.24,39270.0,40055.4
2933,chf,247688.0,252641.76,125000.0,127500.0


In [7]:
# Create new sub dataframe with only USD info
sub = sub[['pledged_USD', 'goal_USD', 'currency']]
sub.head(2)

Unnamed: 0,pledged_USD,goal_USD,currency
0,8782571.0,10000.0,usd
1,6465690.0,15000.0,usd


In [10]:
# Create a new column that shows pledged/goal
sub['percent_total'] = sub['pledged_USD'] / sub['goal_USD']

# print np.min(sub['percent_total'])
# print np.max(sub['percent_total'])
# print np.mean(sub['percent_total'])
# print np.median(sub['percent_total'])
# print ''
# print np.min(sub['pledged_USD'])
# print np.max(sub['pledged_USD'])
# print np.mean(sub['pledged_USD'])
# print np.median(sub['pledged_USD'])

In [11]:
# Reordering columns
sub = sub[['pledged_USD', 'goal_USD', 'percent_total', 'currency']]

In [13]:
sub.head()

Unnamed: 0,pledged_USD,goal_USD,percent_total,currency
0,8782571.0,10000.0,878.2571,usd
1,6465690.0,15000.0,431.046,usd
2,5408916.0,1000000.0,5.408916,usd
3,5702153.0,2000000.0,2.851077,usd
4,3336371.0,400000.0,8.340927,usd


In [14]:
# Make goal bins for histogram
dreaming = sub['goal_USD'].quantile(0.10)
high = sub['goal_USD'].quantile(0.33)   
med = sub['goal_USD'].quantile(.67)   
low = sub['goal_USD'].quantile(1.0)   

def make_bins(x):
    if x <= dreaming:
        return 'dreaming'
    elif x <= high:
        return 'high'
    elif x <= med:
        return 'med'
    else:
        return 'low'
    
sub['goal_bins'] = sub['goal_USD'].apply(make_bins)
sub['goal_bins'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


med         1552
low         1102
high         946
dreaming     400
Name: goal_bins, dtype: int64

In [15]:
# Make pledged bins for histogram
dreaming = sub['pledged_USD'].quantile(0.10)
high = sub['pledged_USD'].quantile(0.33)   
med = sub['pledged_USD'].quantile(.67)   
low = sub['pledged_USD'].quantile(1.0)   

def make_bins(x):
    if x <= dreaming:
        return 'dreaming'
    elif x <= high:
        return 'high'
    elif x <= med:
        return 'med'
    else:
        return 'low'
    
sub['pledged_bins'] = sub['pledged_USD'].apply(make_bins)
sub['pledged_bins'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


med         1360
low         1320
high         920
dreaming     400
Name: pledged_bins, dtype: int64

In [16]:
sub.head()

Unnamed: 0,pledged_USD,goal_USD,percent_total,currency,goal_bins,pledged_bins
0,8782571.0,10000.0,878.2571,usd,high,low
1,6465690.0,15000.0,431.046,usd,high,low
2,5408916.0,1000000.0,5.408916,usd,low,low
3,5702153.0,2000000.0,2.851077,usd,low,low
4,3336371.0,400000.0,8.340927,usd,low,low
