Data Source: https://designcensus.org/

In [30]:
import numpy as np
import pandas as pd
import math
from IPython.display import display

In [31]:
df = pd.read_csv('DesignCensus2017_Data.csv')

In [32]:
print('There a a total of ' + str(len(df.columns)) + ' columns.')

There a a total of 43 columns.


### Column dictionary

In [33]:
col_dict = {}
for col in df.columns:
    index, desc = col.split(' --')
    col_dict['V_' + str(index)] = desc

In [34]:
col_dict

{'V_1': " I've worked in design for:",
 'V_10': 'My immediate team is:',
 'V_11': 'My work touches these industries or sectors:',
 'V_12': 'I’ve been at my current place of work for:',
 'V_13': 'My role is:',
 'V_14': 'My role is:',
 'V_15': 'My role is:',
 'V_16': "I've been in my current role for:",
 'V_17': 'I typically work:',
 'V_18': 'My annual salary is:',
 'V_19': 'My job provides:',
 'V_2': 'I primarily work in:',
 'V_20': 'In my current role, there is:',
 'V_21': 'In my current role, I am:',
 'V_22': 'My job is:',
 'V_23': 'My job stability is:',
 'V_24': 'In addition to my job, I also:',
 'V_25': 'I fuel my daily creativity with coffee:',
 'V_26': "Highest level of education I've completed:",
 'V_27': 'I learned design:',
 'V_28': 'My major focus of study was:',
 'V_29': "Over my career, I've had:",
 'V_3': "I'm also good at:",
 'V_30': "I've shifted my specialty or focus area:",
 'V_31': 'I stay current with design by:',
 'V_32': "I'm a member of:",
 'V_33': "I'd like to ge

### Classify Columns

In [35]:
df.columns = map(lambda x : 'V_' + str(x+1), list(range(len(df.columns)))) 

In [36]:
continous_cols = ['V_18', 'V_37']

In [37]:
pipe_col = ['V_2', 'V_3','V_5','V_7','V_11','V_19',\
            'V_24', 'V_27', 'V_28', 'V_31', 'V_32',\
            'V_33', 'V_40', 'V_41', 'V_42', 'V_43']

In [38]:
cate_cols = list(df.columns)
for col in continous_cols:
    cate_cols.remove(col)

### Summary of Null Value

In [39]:
dff = pd.DataFrame(columns=['Column','Desc', 'Null'])
for col in df.columns:
    num_null = df[col].isnull().sum()
    dff = dff.append({'Column': col, 'Desc' : col_dict[col], 'Null' : num_null }, ignore_index=True)
dff

Unnamed: 0,Column,Desc,Null
0,V_1,I've worked in design for:,0
1,V_2,I primarily work in:,0
2,V_3,I'm also good at:,0
3,V_4,I am currently:,0
4,V_5,My company specializes in:,7198
5,V_6,My company is:,7317
6,V_7,I teach:,12800
7,V_8,I'm not working because:,12831
8,V_9,My organization has:,409
9,V_10,My immediate team is:,402


### Deal with columns with pipe

In [41]:
df['V_36'] = df['V_36'].astype(np.object)
df = df.replace(np.nan,'', regex=True)
pipe_col_dict = {}

for col in pipe_col:
    options = set()
    for row in df[col]:
        arrs = row.split('|') 
        for arr in arrs:
            if arr != '':
                options.add(arr)
    
    pipe_col_dict[col] = {}
    
    index = 1
    for option in options:
        sub_col = col + '_' + str(index)
        df.loc[:,sub_col] = 0
        pipe_col_dict[col][option] = sub_col
        index += 1

In [None]:
for col in pipe_col:
    row_num = 0;
    for row in df[col]:
        arrs = row.split('|') 
        for arr in arrs:
            if arr != '':
                sub_col = pipe_col_dict[col][arr]
                df.loc[row_num, sub_col] += 1
        row_num += 1

In [None]:
df.to_csv('output.csv')

### Exclude null, outliers, etc

TBD

### End of preprocessing

In [42]:
df = pd.read_csv('output.csv')
num_records = len(df)

In [43]:
def summary_cate_col(col):
    if col in continous_cols:
        return
    
    elif col in pipe_col:
        dff = pd.DataFrame(columns=['Options','Count','Percent'])
        for key, value in pipe_col_dict[col].items():
            count = df[value].sum()
            dff = dff.append({'Options': key, 'Count' : count}, ignore_index=True)
    
    else:
        dff = pd.DataFrame(df[col].value_counts())
        dff.reset_index(inplace=True)
        dff.columns = ['Options','Count']
    
    dff['Percent'] = dff.Count / num_records
    dff = dff.style.format({'Percent': '{:,.2%}'.format})
    return dff

In [44]:
for col in cate_cols:
    display(str(col) + ' : ' + col_dict[col])
    display(summary_cate_col(col))

"V_1 :  I've worked in design for:"

Unnamed: 0,Options,Count,Percent
0,1-4 years,4168,31.68%
1,5-9 years,3682,27.98%
2,10-14 years,2059,15.65%
3,15-20 years,1298,9.86%
4,20+ years,1243,9.45%
5,Less than 1 year,708,5.38%


'V_2 : I primarily work in:'

Unnamed: 0,Options,Count,Percent
0,Production,814,6.19%
1,Fashion Design,1481,11.26%
2,Game Design,2610,19.84%
3,Urban Design,4321,32.84%
4,Product Design,267,2.03%
5,Education,1174,8.92%
6,Experience Design,1061,8.06%
7,Interaction Design,2487,18.90%
8,Data Science,1257,9.55%
9,Instructional Design,710,5.40%


"V_3 : I'm also good at:"

Unnamed: 0,Options,Count,Percent
0,Project Management,5695,43.28%
1,Coding,2627,19.97%
2,Drawing or Painting,5245,39.86%
3,Brainstorming and Ideation,10209,77.59%
4,Diversity and Inclusion,3187,24.22%
5,Business,4838,36.77%
6,Relationship Management,735,5.59%
7,Data Analysis,1403,10.66%
8,None of these,222,1.69%
9,Engineering,2864,21.77%


'V_4 : I am currently:'

Unnamed: 0,Options,Count,Percent
0,A full-time employee in-house,5539,42.10%
1,A full-time employee at an agency or consultancy,3757,28.55%
2,Self-employed or freelancing,2006,15.25%
3,A contract employee in-house,402,3.06%
4,Not working,396,3.01%
5,An educator,389,2.96%
6,Other,364,2.77%
7,A contract employee at an agency or consultancy,305,2.32%


'V_5 : My company specializes in:'

Unnamed: 0,Options,Count,Percent
0,Innovation,302,2.30%
1,Advertising,2363,17.96%
2,Branding,432,3.28%
3,Product Design,2986,22.69%
4,Broadcast,2079,15.80%
5,Digital,736,5.59%
6,None of these,3408,25.90%
7,Service Design,888,6.75%
8,Print,1103,8.38%
9,Social Impact,1354,10.29%


'V_6 : My company is:'

Unnamed: 0,Options,Count,Percent
0,For-Profit/Corporate,3738,28.41%
1,Startup,681,5.18%
2,Non-Profit,605,4.60%
3,Education,449,3.41%
4,Other,217,1.65%
5,Government,151,1.15%


'V_7 : I teach:'

Unnamed: 0,Options,Count,Percent
0,Graduate school,67,0.51%
1,Elementary school,30,0.23%
2,Undergraduate school,265,2.01%
3,Programs or workshops,86,0.65%
4,Junior High or High school,36,0.27%
5,Art school,7,0.05%
6,Technical or Trade school,119,0.90%


"V_8 : I'm not working because:"

Unnamed: 0,Options,Count,Percent
0,Prefer not to say,155,1.18%
1,I was laid off,94,0.71%
2,I quit,72,0.55%
3,I'm retired or semi-retired,6,0.05%


'V_9 : My organization has:'

Unnamed: 0,Options,Count,Percent
0,1-10 employees,3235,24.59%
1,1000+ employees,2910,22.12%
2,11-50 employees,2423,18.41%
3,101-250 employees,1149,8.73%
4,51-100 employees,1140,8.66%
5,251-500 employees,872,6.63%
6,501-1000 employees,733,5.57%
7,Not sure,287,2.18%


'V_10 : My immediate team is:'

Unnamed: 0,Options,Count,Percent
0,2-4 people,4919,37.38%
1,5-10 people,3805,28.92%
2,Just me,2302,17.50%
3,11-20 people,1202,9.14%
4,20+ people,528,4.01%


'V_11 : My work touches these industries or sectors:'

Unnamed: 0,Options,Count,Percent
0,Consumer Products,1324,10.06%
1,Automotive,1342,10.20%
2,Advertising,598,4.54%
3,Arts,1732,13.16%
4,Government,523,3.97%
5,Pharmaceutical,3422,26.01%
6,Education,1781,13.54%
7,Media and Broadcasting,1512,11.49%
8,Manufacturing,1340,10.18%
9,Industrial Goods,994,7.55%


'V_12 : I’ve been at my current place of work for:'

Unnamed: 0,Options,Count,Percent
0,1-2 years,3789,28.80%
1,Less than 1 year,3626,27.56%
2,3-4 years,2511,19.08%
3,5-7 years,1224,9.30%
4,10+ years,1112,8.45%
5,8-10 years,495,3.76%


'V_13 : My role is:'

Unnamed: 0,Options,Count,Percent
0,Owner/Partner,1458,11.08%
1,Mid-level specialist,1424,10.82%
2,Art Director,761,5.78%
3,Junior contributor,622,4.73%
4,Creative Director,570,4.33%
5,Other,535,4.07%
6,Senior manager,327,2.49%
7,Director,155,1.18%
8,Intern,104,0.79%
9,Executive,103,0.78%


'V_14 : My role is:'

Unnamed: 0,Options,Count,Percent
0,Mid-level specialist,2770,21.05%
1,Junior contributor,712,5.41%
2,Director,574,4.36%
3,Senior manager,569,4.32%
4,Team Leader,535,4.07%
5,Other,436,3.31%
6,Group Leader,131,1.00%
7,Executive,88,0.67%
8,Senior Executive,73,0.55%
9,Intern,47,0.36%


'V_15 : My role is:'

Unnamed: 0,Options,Count,Percent
0,Associate Professor,122,0.93%
1,Professor,59,0.45%
2,Instructor,58,0.44%
3,Program Chair,48,0.36%
4,Other,42,0.32%
5,Lecturer,29,0.22%
6,Adjunct Professor,24,0.18%
7,Dean,4,0.03%
8,Associate Dean,3,0.02%


"V_16 : I've been in my current role for:"

Unnamed: 0,Options,Count,Percent
0,1-2 years,4424,33.62%
1,Less than 1 year,3955,30.06%
2,3-4 years,2239,17.02%
3,5-7 years,954,7.25%
4,10+ years,808,6.14%
5,8-10 years,377,2.87%


'V_17 : I typically work:'

Unnamed: 0,Options,Count,Percent
0,40 hours per week,5626,42.76%
1,41-50 hours per week,4150,31.54%
2,Less than 40 hours per week,1991,15.13%
3,51-60 hours per week,727,5.53%
4,60+ hours per week,263,2.00%


'V_19 : My job provides:'

Unnamed: 0,Options,Count,Percent
0,Commuting Benefits,3011,22.88%
1,Education Funding,4875,37.05%
2,Wellness Programs,2525,19.19%
3,None of these,8876,67.46%
4,Life Insurance,2404,18.27%
5,Fitness Benefits,3649,27.73%
6,Paid Time Off,784,5.96%
7,Disability Insurance,5701,43.33%
8,Dental Insurance,8766,66.62%
9,Medical Insurance,5872,44.63%


'V_20 : In my current role, there is:'

Unnamed: 0,Options,Count,Percent
0,Some opportunity for advancement,3938,29.93%
1,Promising opportunity for advancement,3369,25.60%
2,Limited opportunity for advancement,3139,23.86%
3,No opportunity for advancement,1507,11.45%
4,Not sure,806,6.13%


'V_21 : In my current role, I am:'

Unnamed: 0,Options,Count,Percent
0,Learning some new skills,5539,42.10%
1,Learning and increasing my skill set every day,5175,39.33%
2,Learning very little,1720,13.07%
3,Learning nothing,252,1.92%
4,Not sure,73,0.55%


'V_22 : My job is:'

Unnamed: 0,Options,Count,Percent
0,Pretty good,8175,62.13%
1,The best,2239,17.02%
2,Not great,1971,14.98%
3,Not sure,211,1.60%
4,The worst,163,1.24%


'V_23 : My job stability is:'

Unnamed: 0,Options,Count,Percent
0,Fairly stable,7297,55.46%
1,Rock solid,3220,24.47%
2,Somewhat shaky,1610,12.24%
3,Non-existent,434,3.30%
4,Not sure,198,1.50%


'V_24 : In addition to my job, I also:'

Unnamed: 0,Options,Count,Percent
0,Have regular freelance clients,2184,16.60%
1,Have a part-time job,1007,7.65%
2,Maintain personal projects,6959,52.89%
3,Design for friends and family,2074,15.76%
4,Own my own business,719,5.46%
5,Teach,1405,10.68%
6,Volunteer,7098,53.94%
7,Take occasional freelance projects,6266,47.62%
8,None of these,4081,31.02%


'V_25 : I fuel my daily creativity with coffee:'

Unnamed: 0,Options,Count,Percent
0,1-2 cups,6279,47.72%
1,3-4 cups,2449,18.61%
2,"No way, tea",2252,17.12%
3,Other,1542,11.72%
4,5-6 cups,483,3.67%
5,7-8 cups,103,0.78%
6,10+ cups,30,0.23%
7,9-10 cups,20,0.15%


"V_26 : Highest level of education I've completed:"

Unnamed: 0,Options,Count,Percent
0,Bachelor's degree,9015,68.51%
1,Master's degree,2217,16.85%
2,Some college,710,5.40%
3,Associate's degree,579,4.40%
4,Technical degree or certificate,304,2.31%
5,High School or less,241,1.83%
6,Doctorate,92,0.70%


'V_27 : I learned design:'

Unnamed: 0,Options,Count,Percent
0,On my own,9215,70.03%
1,In school,10560,80.26%
2,From a mentor or apprenticeship,4157,31.59%
3,On the job,4769,36.24%
4,From programs or workshops,8419,63.98%
5,Through online training,4001,30.41%


'V_28 : My major focus of study was:'

Unnamed: 0,Options,Count,Percent
0,Printing or Print Making,1479,11.24%
1,Advertising,2957,22.47%
2,Urban Design,587,4.46%
3,Product Design,1812,13.77%
4,Fashion Design,460,3.50%
5,Visual Design,478,3.63%
6,Digital Media,621,4.72%
7,Motion Graphics,2552,19.40%
8,Film and Production,284,2.16%
9,Computer Science,172,1.31%


"V_29 : Over my career, I've had:"

Unnamed: 0,Options,Count,Percent
0,2-3 design jobs,5951,45.23%
1,4-5 design jobs,3187,24.22%
2,1 design job,1939,14.74%
3,6-7 design jobs,1076,8.18%
4,10+ design jobs,584,4.44%
5,8-10 design jobs,421,3.20%


"V_30 : I've shifted my specialty or focus area:"

Unnamed: 0,Options,Count,Percent
0,1-2 times,6502,49.41%
1,0 times,3129,23.78%
2,3-4 times,1242,9.44%
3,Slowly over time,1170,8.89%
4,Always ready for what's next,1115,8.47%


'V_31 : I stay current with design by:'

Unnamed: 0,Options,Count,Percent
0,Discussing with peers,167,1.27%
1,Attending conferences,7456,56.67%
2,Following social media,5974,45.40%
3,Seeking online training,10656,80.98%
4,Reading books/magazines,9173,69.71%
5,Reading online publications,6217,47.25%
6,Having a mentor,9636,73.23%
7,Following design feeds,10048,76.36%
8,None of these,2022,15.37%


"V_32 : I'm a member of:"

Unnamed: 0,Options,Count,Percent
0,AIGA,44,0.33%
1,GDC,761,5.78%
2,SPD,38,0.29%
3,SEGD,60,0.46%
4,UCDA,38,0.29%
5,RGD,146,1.11%
6,ico-D (Icograda),1,0.01%
7,Graphic Artists Guild,64,0.49%
8,ADCE,410,3.12%
9,SPARK,5,0.04%


"V_33 : I'd like to get better at:"

Unnamed: 0,Options,Count,Percent
0,Product Design,1614,12.27%
1,Data Analysis,3862,29.35%
2,Business,4333,32.93%
3,Interaction Design,5673,43.11%
4,Social Media,642,4.88%
5,SEO/SEM,4695,35.68%
6,Leadership,1985,15.09%
7,UX Design,2967,22.55%
8,Motion Graphics,2399,18.23%
9,Coding,3593,27.31%


'V_34 : I am:'

Unnamed: 0,Options,Count,Percent
0,"Open to opportunities, but not actively looking",5845,44.42%
1,Not interested in a new job right now,2961,22.50%
2,Casually looking for the right opportunity,2721,20.68%
3,Working hard to find my next job,1497,11.38%
4,Not sure,134,1.02%


"V_35 : Right now, I can't stop listening to:"

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


'V_36 : I live in:'

Unnamed: 0,Options,Count,Percent
0,11238,70,0.53%
1,94117,63,0.48%
2,94110,58,0.44%
3,60647,50,0.38%
4,11201,48,0.36%
5,11215,47,0.36%
6,11211,44,0.33%
7,11222,42,0.32%
8,11216,41,0.31%
9,11217,41,0.31%


'V_38 : I am:'

Unnamed: 0,Options,Count,Percent
0,Female,7039,53.50%
1,Male,5895,44.80%
2,Prefer not to say,112,0.85%
3,Non-binary or third gender,102,0.78%
4,Other,10,0.08%


'V_39 : I consider myself:'

Unnamed: 0,Options,Count,Percent
0,Not LGBTQ,10516,79.92%
1,LGBTQ,1515,11.51%
2,Prefer not to say,1127,8.57%


'V_40 : My ethnicity is:'

Unnamed: 0,Options,Count,Percent
0,White or Caucasian,389,2.96%
1,"Latina, Latino, Latinx, or Hispanic",9624,73.14%
2,Asian,1296,9.85%
3,"Native American, First Nations, or First People",153,1.16%
4,Other,1653,12.56%
5,Black or African American,352,2.68%
6,Native Hawaiian or Pacific Islander,125,0.95%
7,Prefer not to say,544,4.13%


'V_41 : The most critical issues and challenges currently facing design are (answer optional):'

Unnamed: 0,Options,Count,Percent
0,Dark UX patterns,5560,42.26%
1,Trademark and patent issues,2045,15.54%
2,Diversity in design and tech,1234,9.38%
3,Generational differences in the workforce,673,5.11%
4,Algorithm bias,6081,46.22%
5,Ethics in design,3535,26.87%
6,Environmental impacts of design,3764,28.61%
7,Advertising supported content model,1921,14.60%
8,Education cost and equity of access,1816,13.80%
9,"Design not having a ""seat at the table""",1559,11.85%


'V_42 : The emerging trends and technologies that will have the biggest impact on design are (answer optional):'

Unnamed: 0,Options,Count,Percent
0,Behavior tracking and modeling,1235,9.39%
1,Augmented Reality,3529,26.82%
2,Battery and energy technology,3431,26.08%
3,Biological or genetic design,4543,34.53%
4,Image recognition,1208,9.18%
5,Internet of Things,7000,53.20%
6,Virtual Reality,961,7.30%
7,Crowdsourcing and open source,278,2.11%
8,Conversational interfaces,2252,17.12%
9,3D printing,5078,38.59%


'V_43 : The most valuable design skills for the future will be (answer optional):'

Unnamed: 0,Options,Count,Percent
0,Voice interface design,5467,41.55%
1,Data analysis,4886,37.13%
2,DIY or maker mentality,7671,58.30%
3,Leadership,1396,10.61%
4,Play,2291,17.41%
5,Cross-functional skills,5085,38.65%
6,Facilitation,3749,28.49%
7,Observation and listening,1208,9.18%
8,None of these,9170,69.69%
9,Systems thinking,2895,22.00%
