In [1]:
import pandas as pd

In [3]:
# The path to our CSV file
file = "Resources/CrowdfundingData.csv"

# Read our Crowdfunding data into pandas
file_df = pd.read_csv(file)
file_df.head()

Unnamed: 0,id,name,blurb,goal,pledged,outcome,backers_count,country,currency,launched_at,deadline,staff_pick,spotlight,category
0,0,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100,0,failed,0,CA,CAD,1448690400,1450159200,False,False,food/food trucks
1,1,Odom Inc,Managed bottom-line architecture,1400,14560,successful,158,US,USD,1408424400,1408597200,False,True,music/rock
2,2,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400,142523,successful,1425,AU,AUD,1384668000,1384840800,False,False,technology/web
3,3,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200,2477,failed,24,US,USD,1565499600,1568955600,False,False,music/rock
4,4,Larson-Little,Proactive foreground core,7600,5265,failed,53,US,USD,1547964000,1548309600,False,False,theater/plays


In [5]:
# Get a list of all of our columns for easy reference
file_df.columns

Index(['id', 'name', 'blurb', 'goal', 'pledged', 'outcome', 'backers_count',
       'country', 'currency', 'launched_at', 'deadline', 'staff_pick',
       'spotlight', 'category'],
      dtype='object')

In [6]:
# Extract "name", "goal", "pledged", "outcome", "country", "staff_pick",
# "backers_count", and "spotlight"
extract_df = file_df[["name", "goal", "pledged", "outcome", "country", "staff_pick", "backers_count","spotlight" ]]
extract_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
0,"Baldwin, Riley and Jackson",100,0,failed,CA,False,0,False
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False


In [7]:
# Remove projects that made no money at all
filtered_df = extract_df[extract_df['pledged'] != 0]
filtered_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
5,Harris Group,7600,13195,successful,DK,False,174,False


In [14]:
# Collect only those projects that were hosted in the US
us_df = filtered_df[filtered_df['country'] == "US"]
# Create a list of the columns
us_df.columns
# Create a new df for "US" with the columns above. 

Index(['name', 'goal', 'pledged', 'outcome', 'country', 'staff_pick',
       'backers_count', 'spotlight'],
      dtype='object')

In [26]:
# Create a new column that finds the average amount pledged to a project
us_df["average_donation"] = us_df["pledged"] / us_df["backers_count"]
us_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_df["average_donation"] = us_df["pledged"] / us_df["backers_count"]


Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
1,Odom Inc,1400.0,14560.0,successful,US,False,158,True,92.151899
3,"Mcdonald, Gonzalez and Ross",4200.0,2477.0,failed,US,False,24,False,103.208333
4,Larson-Little,7600.0,5265.0,failed,US,False,53,False,99.339623
9,"Rangel, Holt and Jones",6200.0,3208.0,failed,US,False,44,False,72.909091
10,Green Ltd,5200.0,13838.0,successful,US,False,220,False,62.900000
...,...,...,...,...,...,...,...,...,...
994,"Leach, Rich and Price",141100.0,74073.0,failed,US,False,842,True,87.972684
995,Manning-Hamilton,97300.0,153216.0,successful,US,False,2043,True,74.995595
996,Butler LLC,6600.0,4814.0,failed,US,False,112,False,42.982143
998,"Taylor, Santiago and Flores",66600.0,37823.0,failed,US,False,374,True,101.131016


In [27]:
# First convert "average_donation", "goal", and "pledged" columns to float
us_df["average_donation"] = us_df["average_donation"].astype(float)
us_df["goal"] = us_df["goal"].astype(float)
us_df["pledged"] = us_df["pledged"].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_df["average_donation"] = us_df["average_donation"].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_df["goal"] = us_df["goal"].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_df["pledged"] = us_df["pledged"].astype(float)


In [28]:

# Then Format to go to two decimal places, include a dollar sign, and use comma notation
us_df["average_donation"] = us_df["average_donation"].map("${:,.2f}".format)
us_df["goal"] = us_df["goal"].map("${:,.2f}".format)
us_df["pledged"] = us_df["pledged"].map("${:,.2f}".format)
us_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_df["average_donation"] = us_df["average_donation"].map("${:,.2f}".format)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_df["goal"] = us_df["goal"].map("${:,.2f}".format)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_df["pledged"] = us_df["pledged"].map("${:,.2f}".format)


Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
1,Odom Inc,"$1,400.00","$14,560.00",successful,US,False,158,True,$92.15
3,"Mcdonald, Gonzalez and Ross","$4,200.00","$2,477.00",failed,US,False,24,False,$103.21
4,Larson-Little,"$7,600.00","$5,265.00",failed,US,False,53,False,$99.34
9,"Rangel, Holt and Jones","$6,200.00","$3,208.00",failed,US,False,44,False,$72.91
10,Green Ltd,"$5,200.00","$13,838.00",successful,US,False,220,False,$62.90
...,...,...,...,...,...,...,...,...,...
994,"Leach, Rich and Price","$141,100.00","$74,073.00",failed,US,False,842,True,$87.97
995,Manning-Hamilton,"$97,300.00","$153,216.00",successful,US,False,2043,True,$75.00
996,Butler LLC,"$6,600.00","$4,814.00",failed,US,False,112,False,$42.98
998,"Taylor, Santiago and Flores","$66,600.00","$37,823.00",failed,US,False,374,True,$101.13


In [29]:
# Calculate the total number of backers for all US projects
us_df["backers_count"].sum()

545510

In [30]:
# Calculate the average number of backers for all US projects
us_df["backers_count"].mean()

715.8923884514436

In [33]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
us_staff_df = filtered_df[filtered_df['staff_pick'] == True]
us_staff_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
76,"Martin, Conway and Larsen",122900,95993,failed,US,True,1684,True
86,Davis-Smith,7400,12405,successful,US,True,203,False
123,Edwards-Lewis,177700,33092,failed,CA,True,662,False
193,"Calhoun, Rogers and Long",6600,3012,failed,US,True,65,False
205,Weaver-Marquez,1300,5614,successful,US,True,80,False
220,Owens-Le,7900,667,failed,US,True,17,False
221,Huff LLC,121500,119830,failed,US,True,2179,False
225,Fox-Quinn,67800,176398,successful,US,True,5880,False
259,Watkins Ltd,1800,10755,successful,US,True,138,False
291,"Bell, Grimes and Kerr",1800,8219,successful,US,True,107,False


In [36]:
# Group by the outcome of the campaigns and see if staff picks matter (Seems to matter quite a bit)
grouped = us_staff_df.groupby(["outcome"])
grouped["name"].count()


outcome
canceled       4
failed        17
successful    28
Name: name, dtype: int64