In [1]:
import pandas as pd

In [2]:
# The path to our CSV file
file = "Resources/CrowdfundingData.csv"

# Read our Crowdfunding data into pandas
df = pd.read_csv(file)
df.head()

Unnamed: 0,id,name,blurb,goal,pledged,outcome,backers_count,country,currency,launched_at,deadline,staff_pick,spotlight,category
0,0,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100,0,failed,0,CA,CAD,1448690400,1450159200,False,False,food/food trucks
1,1,Odom Inc,Managed bottom-line architecture,1400,14560,successful,158,US,USD,1408424400,1408597200,False,True,music/rock
2,2,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400,142523,successful,1425,AU,AUD,1384668000,1384840800,False,False,technology/web
3,3,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200,2477,failed,24,US,USD,1565499600,1568955600,False,False,music/rock
4,4,Larson-Little,Proactive foreground core,7600,5265,failed,53,US,USD,1547964000,1548309600,False,False,theater/plays


In [3]:
# Get a list of all of our columns for easy reference
df.columns

Index(['id', 'name', 'blurb', 'goal', 'pledged', 'outcome', 'backers_count',
       'country', 'currency', 'launched_at', 'deadline', 'staff_pick',
       'spotlight', 'category'],
      dtype='object')

In [4]:
# Extract "name", "goal", "pledged", "outcome", "country", "staff_pick",
# "backers_count", and "spotlight"
reduced_crowdfunding_df = df.loc[:, ["name", "goal", "pledged",
                                    "outcome", "country", "staff_pick", "backers_count", "spotlight"]]
reduced_crowdfunding_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
0,"Baldwin, Riley and Jackson",100,0,failed,CA,False,0,False
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
...,...,...,...,...,...,...,...,...
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True
996,Butler LLC,6600,4814,failed,US,False,112,False
997,Ball LLC,7600,4603,canceled,IT,False,139,False
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True


In [5]:
# Remove projects that made no money at all
reduced_crowdfunding_df = reduced_crowdfunding_df.loc[(
    reduced_crowdfunding_df["pledged"] > 0)]
reduced_crowdfunding_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
5,Harris Group,7600,13195,successful,DK,False,174,False


In [6]:
# Collect only those projects that were hosted in Australia.

# Create a list of the columns
columns = [
    "name", "goal", "pledged", "outcome", 
    "country", "staff_pick", "backers_count", "spotlight"]

#  Create a new df for "US" with the columns. 
hosted_in_aus_df = reduced_crowdfunding_df.loc[reduced_crowdfunding_df["country"] == "AU",  columns]
hosted_in_aus_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
85,"Hill, Lawson and Wilkinson",4900,6430,successful,AU,False,71,False
87,Farrell and Sons,198500,123040,failed,AU,False,1482,True
98,"Arias, Allen and Miller",97800,32951,failed,AU,False,1220,False
112,Jones-Meyer,4700,12635,successful,AU,False,361,False


In [7]:
# Create a new column that finds the average amount pledged to a project
average_donation = hosted_in_aus_df['pledged'] / hosted_in_aus_df['backers_count']
average_donation

2      100.016140
85      90.563380
87      83.022942
98      27.009016
112     35.000000
129     86.472727
156     71.013193
157     73.733333
167     74.000000
180     81.981965
203     42.999778
209     51.004950
236     75.842105
241    101.976841
248     60.105505
267     51.990607
296     88.210526
297     65.240385
365    104.776786
396     48.012469
502     37.037634
510     70.908397
551     37.998561
563     60.082353
590     67.720930
620     90.039062
656     98.966270
672     24.997516
706    103.037918
723     92.013889
762     62.040000
766     55.052419
785    101.881890
805     73.611940
827     74.804878
855     53.000000
872     98.666667
896    105.026027
913     67.946463
919     92.955556
942     92.611940
944     30.958175
954    101.023256
dtype: float64

In [8]:
# Create a new column that finds the average amount pledged to a project
hosted_in_aus_df["average_donation"] = hosted_in_aus_df['pledged'] / \
    hosted_in_aus_df['backers_count']

In [9]:
# First convert "average_donation", "goal", and "pledged" columns to float
# Then Format to go to two decimal places, include a dollar sign, and use comma notation

hosted_in_aus_df["average_donation"] = hosted_in_aus_df["average_donation"].astype(float).map(
    "${:,.2f}".format)
hosted_in_aus_df["goal"] = hosted_in_aus_df["goal"].astype(float).map("${:,.2f}".format)
hosted_in_aus_df["pledged"] = hosted_in_aus_df["pledged"].astype(float).map("${:,.2f}".format)

In [10]:
hosted_in_aus_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
2,"Melton, Robinson and Fritz","$108,400.00","$142,523.00",successful,AU,False,1425,False,$100.02
85,"Hill, Lawson and Wilkinson","$4,900.00","$6,430.00",successful,AU,False,71,False,$90.56
87,Farrell and Sons,"$198,500.00","$123,040.00",failed,AU,False,1482,True,$83.02
98,"Arias, Allen and Miller","$97,800.00","$32,951.00",failed,AU,False,1220,False,$27.01
112,Jones-Meyer,"$4,700.00","$12,635.00",successful,AU,False,361,False,$35.00


In [11]:
# Calculate the total number of backers for all Australian projects
hosted_in_aus_df["backers_count"].sum()

34226

In [12]:
# Calculate the average number of backers for all Australian projects
hosted_in_aus_df["backers_count"].mean()

795.953488372093

In [14]:
# Collect only those Australian campaigns that were featured in the spotlight
spotlight_df = hosted_in_aus_df.loc[hosted_in_aus_df["spotlight"] == True]
spotlight_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
87,Farrell and Sons,"$198,500.00","$123,040.00",failed,AU,False,1482,True,$83.02
236,Gallegos-Cobb,"$39,500.00","$4,323.00",failed,AU,False,57,True,$75.84
241,Gonzalez-Martinez,"$168,500.00","$171,729.00",successful,AU,False,1684,True,$101.98
297,"Brown, Herring and Bass","$7,200.00","$6,785.00",failed,AU,False,104,True,$65.24
502,Johnson Inc,"$1,300.00","$6,889.00",successful,AU,False,186,True,$37.04
551,Martin-James,"$180,100.00","$105,598.00",failed,AU,False,2779,True,$38.00
706,Moreno Ltd,"$108,400.00","$138,586.00",successful,AU,False,1345,True,$103.04
785,"Peterson, Fletcher and Sanchez","$6,700.00","$12,939.00",successful,AU,False,127,True,$101.88
827,"Miranda, Martinez and Lowery","$2,300.00","$6,134.00",successful,AU,False,82,True,$74.80
896,Wright-Bryant,"$19,800.00","$153,338.00",successful,AU,False,1460,True,$105.03


In [15]:
# Group by the outcome of the campaigns and see if a spotlight matters
outcome_groups = spotlight_df.groupby("outcome")
outcome_groups["name"].count()

outcome
failed        5
successful    6
Name: name, dtype: int64