In [1]:
import pandas as pd

In [2]:
# The path to our CSV file
file = r"Resources/KickstarterData.csv"

# Read our Kickstarter data into pandas
df = pd.read_csv(file)
df.head()
df.count()

id                        4120
photo                     4120
name                      4120
blurb                     4119
goal                      4120
pledged                   4120
state                     4120
slug                      4120
disable_communication     4120
country                   4120
currency                  4120
currency_symbol           4120
currency_trailing_code    4120
deadline                  4120
state_changed_at          4120
created_at                4120
launched_at               4120
staff_pick                4120
is_starrable              4120
backers_count             4120
static_usd_rate           4120
usd_pledged               4120
creator                   4120
location                  4115
category                  4120
profile                   4120
spotlight                 4120
urls                      4120
source_url                4120
friends                      0
is_starred                   0
is_backing                   0
permissi

In [3]:
# Get a list of all of our columns for easy reference
col = list(df)
col

['id',
 'photo',
 'name',
 'blurb',
 'goal',
 'pledged',
 'state',
 'slug',
 'disable_communication',
 'country',
 'currency',
 'currency_symbol',
 'currency_trailing_code',
 'deadline',
 'state_changed_at',
 'created_at',
 'launched_at',
 'staff_pick',
 'is_starrable',
 'backers_count',
 'static_usd_rate',
 'usd_pledged',
 'creator',
 'location',
 'category',
 'profile',
 'spotlight',
 'urls',
 'source_url',
 'friends',
 'is_starred',
 'is_backing',
 'permissions']

In [4]:
# Extract "name", "goal", "pledged", "state", "country", "staff_pick",
# "backers_count", and "spotlight"
df_extract = df[["name", "goal", "pledged", "state", "country", "staff_pick", "backers_count","spotlight"]]
df_extract.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True


In [5]:
# Remove projects that made no money at all
df_extract = df_extract[df_extract.pledged != 0]
df_extract.count()

name             3307
goal             3307
pledged          3307
state            3307
country          3307
staff_pick       3307
backers_count    3307
spotlight        3307
dtype: int64

In [6]:
# Collect only those projects that were hosted in the US
df_extract = df_extract.loc[df_extract["country"] == "US", :]
df_extract.count()

name             2129
goal             2129
pledged          2129
state            2129
country          2129
staff_pick       2129
backers_count    2129
spotlight        2129
dtype: int64

In [7]:
# Create a new column that finds the average amount pledged to a project
df_extract['average_donation'] = df_extract["pledged"]/df_extract["backers_count"]
df_extract.head()


Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True,172.058824
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True,106.323529
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True,76.537879
14,Pride Con,15000.0,15110.0,successful,US,False,60,True,251.833333
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True,128.825


In [8]:
# First convert "average_donation", "goal", and "pledged" columns to float
# df_extract['average_donation'] = pd.to_numeric(df_extract['average_donation'])
# df_extract['goal'] = pd.to_numeric(df_extract['goal'])
# df_extract['pledged'] = pd.to_numeric(df_extract['pledged'])
# df_extract.dtypes
# df_extract[['average_donation','goal','pledged']] = pd.to_numeric(df_extract[['average_donation','goal','pledged']])


In [11]:
# Then Format to go to two decimal places, include a dollar sign, and use comma notation
df_extract['average_donation'] = df_extract['average_donation'].astype(float).map("${:.2f}".format)
df_extract['goal'] = df_extract['goal'].astype(float).map("${:.2f}".format)
df_extract['pledged'] = df_extract['pledged'].astype(float).map("${:.2f}".format)

df_extract.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
0,The Class Act Players Theatre Company Presents...,$1500.00,$2925.00,successful,US,False,17,True,$172.06
8,Forefront Festival 2015,$7200.00,$7230.00,successful,US,False,68,True,$106.32
11,Hamlet the Hip-Hopera,$9747.00,$10103.00,successful,US,True,132,True,$76.54
14,Pride Con,$15000.00,$15110.00,successful,US,False,60,True,$251.83
15,En Garde Arts Emerging Artists Festival BOSSS,$10000.00,$10306.00,successful,US,True,80,True,$128.82


In [12]:
# Calculate the total number of backers for all US projects
us_backers = df_extract['backers_count'].sum()
us_backers

89273

In [13]:
# Calculate the average number of backers for all US projects
avg_backers_nb = us_backers/df_extract['name'].count()
avg_backers_nb

41.931892907468296

In [14]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
df_staff_pick = df_extract.loc[df_extract["staff_pick"] == True, :]
df_staff_pick.count()

name                174
goal                174
pledged             174
state               174
country             174
staff_pick          174
backers_count       174
spotlight           174
average_donation    174
dtype: int64

In [15]:
# Group by the state of the campaigns and see if staff picks matter (Seems to matter quite a bit)
US_staff_pick = df_staff_pick.groupby(['state']).count()
total = US_staff_pick.sum()

US_staff_pick['ratio'] = US_staff_pick['name']/US_staff_pick['name'].sum()*100
US_staff_pick['ratio'] = US_staff_pick['ratio'].map("{:.1f}%".format)
print(total)

US_staff_pick.head()

name                174
goal                174
pledged             174
country             174
staff_pick          174
backers_count       174
spotlight           174
average_donation    174
dtype: int64


Unnamed: 0_level_0,name,goal,pledged,country,staff_pick,backers_count,spotlight,average_donation,ratio
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
canceled,6,6,6,6,6,6,6,6,3.4%
failed,21,21,21,21,21,21,21,21,12.1%
live,2,2,2,2,2,2,2,2,1.1%
successful,145,145,145,145,145,145,145,145,83.3%


NameError: name 'ratio_func' is not defined

In [19]:
def ratio_func(df):
    
    total = df.sum()
    df['ratio'] = df['name']/df['name'].sum()*100
    df['ratio'] = df['ratio'].map("{:.1f}%".format)
    
    return df
    
    
US_status = df_extract.groupby(['state']).count()
US_status

US_status = ratio_func(US_status)

# total = US_status.sum()

# US_status['ratio'] = US_status['name']/US_status['name'].sum()*100
# US_status['ratio'] = US_status['ratio'].map("{:.1f}%".format)
# print(total)

US_status.head()    

Unnamed: 0_level_0,name,goal,pledged,country,staff_pick,backers_count,spotlight,average_donation,ratio
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
canceled,216,216,216,216,216,216,216,216,10.1%
failed,1123,1123,1123,1123,1123,1123,1123,1123,52.7%
live,13,13,13,13,13,13,13,13,0.6%
successful,775,775,775,775,775,775,775,775,36.4%
suspended,2,2,2,2,2,2,2,2,0.1%
