In [1]:
# Dependencies
import pandas as pd

In [2]:
# load CSV
csv_path = "../Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [3]:
# Read with pandas
survey_df = pd.read_csv(csv_path, encoding = "iso-8859-1", low_memory=False)

# Getting a high level view of the dataframe
survey_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15620 entries, 0 to 15619
Columns: 113 entries, Age to StudentDebtOwe
dtypes: float64(85), object(28)
memory usage: 13.5+ MB


In [4]:
# Inspect all columns
survey_df.columns

Index(['Age', 'AttendedBootcamp', 'BootcampFinish', 'BootcampFullJobAfter',
       'BootcampLoanYesNo', 'BootcampMonthsAgo', 'BootcampName',
       'BootcampPostSalary', 'BootcampRecommend', 'ChildrenNumber',
       ...
       'ResourceSoloLearn', 'ResourceStackOverflow', 'ResourceTreehouse',
       'ResourceUdacity', 'ResourceUdemy', 'ResourceW3Schools',
       'ResourceYouTube', 'SchoolDegree', 'SchoolMajor', 'StudentDebtOwe'],
      dtype='object', length=113)

In [5]:
#size of the dataframe
survey_df.shape

(15620, 113)

In [6]:
# Extract only columns 0, 1, 2, 3, 4, 7, 8, 9, 10,11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111 
survey_data = survey_df.iloc[:, [0, 1, 2, 3, 4, 7, 8, 9, 10,11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]].copy()
survey_data.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,CodeEventBootcamp,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,more than 1 million,,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,more than 1 million,,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [7]:
survey_data["AttendedBootcamp"].value_counts()

0.0    14427
1.0      953
Name: AttendedBootcamp, dtype: int64

In [8]:
# Change "0" to "No" and "1" to "Yes" in response columns
survey_data["AttendedBootcamp"].replace({0: "No", 1: "Yes"}, inplace = True)
survey_data["AttendedBootcamp"].value_counts()
#survey_data.head()

No     14427
Yes      953
Name: AttendedBootcamp, dtype: int64

In [9]:
# Calculate total number of respondents in survey
respondent_count = len(survey_data["Age"])
respondent_count

15620

In [10]:
# Extract rows corresponding only to people who attended a bootcamp
attendees_df = survey_data.loc[survey_data["AttendedBootcamp"] == "Yes", :]
attendees_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,CodeEventBootcamp,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
93,32.0,Yes,1.0,0.0,0.0,,0.0,,"between 100,000 and 1 million",,United States of America,"arts, entertainment, sports, or media",Self-employed business owner,male,,20.0,67000.0,,bachelor's degree,Biology
97,26.0,Yes,1.0,1.0,0.0,45000.0,0.0,,more than 1 million,,United States of America,software development,Employed for wages,male,0.0,10.0,40000.0,,master's degree (non-professional),Music
130,41.0,Yes,1.0,1.0,1.0,75000.0,1.0,3.0,"less than 100,000",,United States of America,software development,Employed for wages,male,1.0,30.0,75000.0,,"some college credit, no degree",
159,26.0,Yes,1.0,0.0,0.0,,0.0,,"between 100,000 and 1 million",,United States of America,,Not working and not looking for work,female,,30.0,,Full-Stack Web Developer,"some college credit, no degree",
188,24.0,Yes,0.0,,1.0,,0.0,,"between 100,000 and 1 million",,Canada,,Not working but looking for work,female,,60.0,,,"some college credit, no degree",


In [11]:
# Calculate average age of attendees 
avg_age = attendees_df["Age"].mean()
print(f" average age: {avg_age}")

 average age: 31.066014669926652


In [12]:
# Calculate how many people attended a bootcamp
number_attendees = attendees_df["AttendedBootcamp"].count()
print(f" number of people attending bootcamp: {number_attendees}")

 number of people attending bootcamp: 953


In [13]:
# Calculate how many attendees hold degrees
#attendees_df["SchoolDegree"].unique()
degrees = ["bachelor's degree", "master's degree (non-professional)",\
       "professional degree (MBA, MD, JD, etc.)", "associate's degree", "Ph.D."]

number_degreeholder = attendees_df.loc[attendees_df["SchoolDegree"].isin(degrees), "AttendedBootcamp"].count()
print(f" number of people who holds a degree: {number_degreeholder}")

 number of people who holds a degree: 637


In [14]:
# Count number of attendees who self-identify as male; female; or are of non-binary gender identification  
number_by_gender = attendees_df.groupby("Gender")["AttendedBootcamp"].count() 
number_by_gender

Gender
agender          2
female         326
genderqueer      6
male           496
trans            3
Name: AttendedBootcamp, dtype: int64

In [15]:
# Calculate percentage of respondents who attended a bootcamp
percent_bootcamp_attendees = 100*(attendees_df["AttendedBootcamp"].count()/survey_data["AttendedBootcamp"].count())
print(f" percent of people who attended bootcamp: {percent_bootcamp_attendees}")

 percent of people who attended bootcamp: 6.196358907672302


In [16]:
# Calculate percentage of respondents belonging to each gender
male_percent = 100*(number_by_gender["male"]/number_attendees)

female_percent = 100*(number_by_gender["female"]/number_attendees)

other_gender_percent = 100*(number_attendees - number_by_gender["male"] - number_by_gender["female"])/number_attendees
            
print(f"% of folks who attended bootcamp by gender\n\
Male: {male_percent}\nFemale: {female_percent}\nOther: {other_gender_percent}")

% of folks who attended bootcamp by gender
Male: 52.04616998950682
Female: 34.20776495278069
Other: 13.746065057712487


In [17]:
# Calculate percentage of attendees with a school degree
percent_degree_holders = 100*(number_degreeholder/number_attendees)
percent_degree_holders

66.84155299055614

In [18]:
# Calculate average post-bootcamp salary
avg_postbootcamp_salary = attendees_df["BootcampPostSalary"].mean()
avg_postbootcamp_salary

63740.50606060606

In [19]:
# Create a new table consolodating above calculations
bootcamp_breakdown = pd.DataFrame({"Total survey responses": [respondent_count],
                                   "Total Bootcamp attendees": [number_attendees],
                                   "% attended bootcamp": [percent_bootcamp_attendees],
                                   "Avg. Age": [avg_age],
                                   "% Male": [male_percent],
                                   "% Female":[female_percent],
                                   "% Non Gender Specific":[other_gender_percent],
                                   "% Has a degree": [percent_degree_holders],
                                   "Average Post Bootcamp Salary": [avg_postbootcamp_salary]
})

bootcamp_breakdown = bootcamp_breakdown.round(2)

bootcamp_breakdown = bootcamp_breakdown[["Total survey responses", 
                                         "Total Bootcamp attendees", 
                                         "% attended bootcamp", 
                                         "Avg. Age", 
                                         "% Has a degree", 
                                         "% Male", 
                                         "% Female",
                                         "% Non Gender Specific",
                                         "Average Post Bootcamp Salary"]]


bootcamp_breakdown

Unnamed: 0,Total survey responses,Total Bootcamp attendees,% attended bootcamp,Avg. Age,% Has a degree,% Male,% Female,% Non Gender Specific,Average Post Bootcamp Salary
0,15620,953,6.2,31.07,66.84,52.05,34.21,13.75,63740.51


In [20]:
# Improve formatting before outputting spreadsheet
bootcamp_breakdown["% Male"] = bootcamp_breakdown["% Male"].map("{0:,.2f}%".format)
bootcamp_breakdown["% Female"] = bootcamp_breakdown["% Female"].map("{0:,.2f}%".format)
bootcamp_breakdown["% attended bootcamp"] = bootcamp_breakdown["% attended bootcamp"].map("{0:,.2f}%".format)
bootcamp_breakdown["% Non Gender Specific"] = bootcamp_breakdown["% Non Gender Specific"].map("{0:,.2f}%".format)
bootcamp_breakdown["% Has a degree"] = bootcamp_breakdown["% Has a degree"].map("{0:,.2f}%".format)
bootcamp_breakdown["Average Post Bootcamp Salary"] = bootcamp_breakdown["Average Post Bootcamp Salary"].map("${0:,.0f}".format)
bootcamp_breakdown

Unnamed: 0,Total survey responses,Total Bootcamp attendees,% attended bootcamp,Avg. Age,% Has a degree,% Male,% Female,% Non Gender Specific,Average Post Bootcamp Salary
0,15620,953,6.20%,31.07,66.84%,52.05%,34.21%,13.75%,"$63,741"


In [21]:
# Export to Excel
bootcamp_breakdown.to_excel("../Output/BootcampOutputPart1.xlsx", index=False)