In [46]:
import pandas as pd

In [47]:
# The path to our CSV file
path = "Resources/CrowdfundingData.csv"
# Read our Crowdfunding data into pandas
df = pd.read_csv(path)

In [48]:
# Get a list of all of our columns for easy reference
df.columns

Index(['id', 'name', 'blurb', 'goal', 'pledged', 'outcome', 'backers_count',
       'country', 'currency', 'launched_at', 'deadline', 'staff_pick',
       'spotlight', 'category'],
      dtype='object')

In [49]:
# Extract "name", "goal", "pledged", "outcome", "country", "staff_pick",
# "backers_count", and "spotlight"
col_df = df[["name", "goal", "pledged", "outcome", "country", "staff_pick","backers_count", "spotlight"]]
col_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
0,"Baldwin, Riley and Jackson",100,0,failed,CA,False,0,False
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False


In [50]:
# Remove projects that made no money at all
all_money = col_df.drop(col_df[col_df.pledged == 0].index)
all_money

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
5,Harris Group,7600,13195,successful,DK,False,174,False
...,...,...,...,...,...,...,...,...
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True
996,Butler LLC,6600,4814,failed,US,False,112,False
997,Ball LLC,7600,4603,canceled,IT,False,139,False
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True


In [51]:
# Collect only those projects that were hosted in the US
# Create a list of the columns
# Create a new df for "US" with the columns above. 
all_money.columns
us_df = all_money.drop(all_money[all_money.country != 'US'].index)
us_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,US,False,158,True
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
9,"Rangel, Holt and Jones",6200,3208,failed,US,False,44,False
10,Green Ltd,5200,13838,successful,US,False,220,False
...,...,...,...,...,...,...,...,...
994,"Leach, Rich and Price",141100,74073,failed,US,False,842,True
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True
996,Butler LLC,6600,4814,failed,US,False,112,False
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True


In [52]:
# Create a new column that finds the average amount pledged to a project
us_df['Average Pledge'] = us_df['pledged'] / us_df['backers_count']
us_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,Average Pledge
1,Odom Inc,1400,14560,successful,US,False,158,True,$92.15
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False,$103.21
4,Larson-Little,7600,5265,failed,US,False,53,False,$99.34
9,"Rangel, Holt and Jones",6200,3208,failed,US,False,44,False,$72.91
10,Green Ltd,5200,13838,successful,US,False,220,False,$62.90
...,...,...,...,...,...,...,...,...,...
994,"Leach, Rich and Price",141100,74073,failed,US,False,842,True,$87.97
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True,$75.00
996,Butler LLC,6600,4814,failed,US,False,112,False,$42.98
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True,$101.13


In [53]:
# First convert "average_donation", "goal", and "pledged" columns to float
us_df['goal'] = us_df['goal'].astype(float)
us_df['pledged'] = us_df['pledged'].astype(float)
us_df['Average Pledge'] = us_df['Average Pledge'].astype(float)

pd.options.display.float_format = '${:,.2f}'.format
us_df
# Then Format to go to two decimal places, include a dollar sign, and use comma notation

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,Average Pledge
1,Odom Inc,"$1,400.00","$14,560.00",successful,US,False,158,True,$92.15
3,"Mcdonald, Gonzalez and Ross","$4,200.00","$2,477.00",failed,US,False,24,False,$103.21
4,Larson-Little,"$7,600.00","$5,265.00",failed,US,False,53,False,$99.34
9,"Rangel, Holt and Jones","$6,200.00","$3,208.00",failed,US,False,44,False,$72.91
10,Green Ltd,"$5,200.00","$13,838.00",successful,US,False,220,False,$62.90
...,...,...,...,...,...,...,...,...,...
994,"Leach, Rich and Price","$141,100.00","$74,073.00",failed,US,False,842,True,$87.97
995,Manning-Hamilton,"$97,300.00","$153,216.00",successful,US,False,2043,True,$75.00
996,Butler LLC,"$6,600.00","$4,814.00",failed,US,False,112,False,$42.98
998,"Taylor, Santiago and Flores","$66,600.00","$37,823.00",failed,US,False,374,True,$101.13


In [54]:
# Calculate the total number of backers for all US projects
us_df['backers_count'].sum()

545510

In [55]:
# Calculate the average number of backers for all US projects
us_df['backers_count'].mean()

715.8923884514436

In [61]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
staffPick = us_df.drop(us_df[us_df.staff_pick == False].index)
staffPick

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,Average Pledge
76,"Martin, Conway and Larsen","$122,900.00","$95,993.00",failed,US,True,1684,True,$57.00
86,Davis-Smith,"$7,400.00","$12,405.00",successful,US,True,203,False,$61.11
193,"Calhoun, Rogers and Long","$6,600.00","$3,012.00",failed,US,True,65,False,$46.34
205,Weaver-Marquez,"$1,300.00","$5,614.00",successful,US,True,80,False,$70.17
220,Owens-Le,"$7,900.00",$667.00,failed,US,True,17,False,$39.24
221,Huff LLC,"$121,500.00","$119,830.00",failed,US,True,2179,False,$54.99
225,Fox-Quinn,"$67,800.00","$176,398.00",successful,US,True,5880,False,$30.00
259,Watkins Ltd,"$1,800.00","$10,755.00",successful,US,True,138,False,$77.93
291,"Bell, Grimes and Kerr","$1,800.00","$8,219.00",successful,US,True,107,False,$76.81
384,"Baker, Collins and Smith","$114,400.00","$196,779.00",successful,US,True,4799,True,$41.00


In [77]:
# Group by the outcome of the campaigns and see if staff picks matter (Seems to matter quite a bit)
us_df.groupby('outcome')['staff_pick'].count()

outcome
canceled       44
failed        273
live            9
successful    436
Name: staff_pick, dtype: int64