In [70]:
import pandas as pd

In [71]:
# The path to our CSV file
file = "KickstarterData.csv"
# Read our Kickstarter data into pandas

kicks_df = pd.read_csv("KickstarterData.csv")

In [72]:
# Get a list of all of our columns for easy reference
kicks_df.columns

Index(['id', 'photo', 'name', 'blurb', 'goal', 'pledged', 'state', 'slug',
       'disable_communication', 'country', 'currency', 'currency_symbol',
       'currency_trailing_code', 'deadline', 'state_changed_at', 'created_at',
       'launched_at', 'staff_pick', 'is_starrable', 'backers_count',
       'static_usd_rate', 'usd_pledged', 'creator', 'location', 'category',
       'profile', 'spotlight', 'urls', 'source_url', 'friends', 'is_starred',
       'is_backing', 'permissions'],
      dtype='object')

In [73]:
# Extract "name", "goal", "pledged", "state", "country", "staff_pick",
# "backers_count", and "spotlight"
kicks_extract = kicks_df.loc[:, ["name", "goal","pledged","state","country","staff_pick","backers_count","spotlight"]]


In [74]:
# Remove projects that made no money at all
pledged_no_cero = kicks_extract.loc[kicks_extract["pledged"] > 0]
pledged_no_cero["pledged"].min()

1.0

In [75]:
# Collect only those projects that were hosted in the US
us_projects = pledged_no_cero.loc[pledged_no_cero["country"] == "US"]

# Create a list of the columns
columns = us_projects.count()
columns
# Create a new df for "US" with the columns above. 
us_projects.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True
14,Pride Con,15000.0,15110.0,successful,US,False,60,True
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True


In [76]:
# Create a new column that finds the average amount pledged to a project
us_projects["average_donation"] = us_projects["pledged"] / us_projects["backers_count"]
us_projects.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True,172.058824
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True,106.323529
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True,76.537879
14,Pride Con,15000.0,15110.0,successful,US,False,60,True,251.833333
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True,128.825


In [77]:
# First convert "average_donation", "goal", and "pledged" columns to float
# Then Format to go to two decimal places, include a dollar sign, and use comma notation

us_projects["average_donation"] = us_projects["average_donation"].astype(float).map(
    "${:,.2f}".format)
us_projects["goal"] = us_projects["goal"].astype(float).map("${:,.2f}".format)
us_projects["pledged"] = us_projects["pledged"].astype(float).map("${:,.2f}".format)
us_projects.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
0,The Class Act Players Theatre Company Presents...,"$1,500.00","$2,925.00",successful,US,False,17,True,$172.06
8,Forefront Festival 2015,"$7,200.00","$7,230.00",successful,US,False,68,True,$106.32
11,Hamlet the Hip-Hopera,"$9,747.00","$10,103.00",successful,US,True,132,True,$76.54
14,Pride Con,"$15,000.00","$15,110.00",successful,US,False,60,True,$251.83
15,En Garde Arts Emerging Artists Festival BOSSS,"$10,000.00","$10,306.00",successful,US,True,80,True,$128.82


In [78]:
# Calculate the total number of backers for all US projects
total_backers = us_projects["backers_count"].sum()
total_backers

89273

In [79]:
# Calculate the average number of backers for all US projects
average_backers = total_backers / us_projects["backers_count"].count()
average_backers

41.931892907468296

In [85]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
#us_projects = pledged_no_cero.loc[pledged_no_cero["country"] == "US"]

only_staff_picked_df = us_projects.loc[us_projects["staff_pick"] == True]
only_staff_picked_df.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
11,Hamlet the Hip-Hopera,"$9,747.00","$10,103.00",successful,US,True,132,True,$76.54
15,En Garde Arts Emerging Artists Festival BOSSS,"$10,000.00","$10,306.00",successful,US,True,80,True,$128.82
39,"""Poor People"" at FringeNYC 2015","$5,500.00","$5,682.00",successful,US,True,34,True,$167.12
44,Queen Mab's Steampunk and Fairie Street Festival,"$1,300.00","$3,363.00",successful,US,True,62,True,$54.24
45,RAFT: a new play by Emily Kitchens,"$7,500.00","$7,826.00",successful,US,True,120,True,$65.22


In [92]:
# Group by the state of the campaigns and see if staff picks matter (Seems to matter quite a bit)
grouped_df = only_staff_picked_df.groupby("state")
grouped_df["name"].count()

state
canceled        6
failed         21
live            2
successful    145
Name: name, dtype: int64