Data Source: https://designcensus.org/

In [1]:
import numpy as np
import pandas as pd
import math
from IPython.display import display

In [2]:
df = pd.read_csv('DesignCensus2017_Data.csv')

In [3]:
print('There a a total of ' + str(len(df.columns)) + ' columns.')

There a a total of 43 columns.


### Column dictionary

In [4]:
col_dict = {}
for col in df.columns:
    index, desc = col.split(' --')
    col_dict['V_' + str(index)] = desc

In [5]:
col_dict

{'V_1': " I've worked in design for:",
 'V_10': 'My immediate team is:',
 'V_11': 'My work touches these industries or sectors:',
 'V_12': 'I’ve been at my current place of work for:',
 'V_13': 'My role is:',
 'V_14': 'My role is:',
 'V_15': 'My role is:',
 'V_16': "I've been in my current role for:",
 'V_17': 'I typically work:',
 'V_18': 'My annual salary is:',
 'V_19': 'My job provides:',
 'V_2': 'I primarily work in:',
 'V_20': 'In my current role, there is:',
 'V_21': 'In my current role, I am:',
 'V_22': 'My job is:',
 'V_23': 'My job stability is:',
 'V_24': 'In addition to my job, I also:',
 'V_25': 'I fuel my daily creativity with coffee:',
 'V_26': "Highest level of education I've completed:",
 'V_27': 'I learned design:',
 'V_28': 'My major focus of study was:',
 'V_29': "Over my career, I've had:",
 'V_3': "I'm also good at:",
 'V_30': "I've shifted my specialty or focus area:",
 'V_31': 'I stay current with design by:',
 'V_32': "I'm a member of:",
 'V_33': "I'd like to ge

### Classify Columns

In [6]:
df.columns = map(lambda x : 'V_' + str(x+1), list(range(len(df.columns)))) 

In [7]:
continous_cols = ['V_18', 'V_37']

In [8]:
pipe_col = ['V_2', 'V_3','V_5','V_7','V_11','V_19',\
            'V_24', 'V_27', 'V_28', 'V_31', 'V_32',\
            'V_33', 'V_40', 'V_41', 'V_42', 'V_43']

In [9]:
cate_cols = list(df.columns)
for col in continous_cols:
    cate_cols.remove(col)

### Summary of Null Value

In [10]:
dff = pd.DataFrame(columns=['Column','Desc', 'Null'])
for col in df.columns:
    num_null = df[col].isnull().sum()
    dff = dff.append({'Column': col, 'Desc' : col_dict[col], 'Null' : num_null }, ignore_index=True)
dff

Unnamed: 0,Column,Desc,Null
0,V_1,I've worked in design for:,0
1,V_2,I primarily work in:,0
2,V_3,I'm also good at:,0
3,V_4,I am currently:,0
4,V_5,My company specializes in:,7198
5,V_6,My company is:,7317
6,V_7,I teach:,12800
7,V_8,I'm not working because:,12831
8,V_9,My organization has:,409
9,V_10,My immediate team is:,402


### Deal with columns with pipe

In [19]:
df['V_36'] = df['V_36'].astype(np.object)
df = df.replace(np.nan,'', regex=True)
pipe_col_dict = {}

for col in pipe_col:
    options = set()
    for row in df[col]:
        arrs = row.split('|') 
        for arr in arrs:
            if arr != '':
                options.add(arr)
    
    pipe_col_dict[col] = {}
    
    index = 1
    for option in options:
        sub_col = col + '_' + str(index)
        df.loc[:,sub_col] = 0
        pipe_col_dict[col][option] = sub_col
        col_dict[sub_col] = col_dict[col] + '-' + option
        index += 1

In [14]:
for col in pipe_col:
    row_num = 0;
    for row in df[col]:
        arrs = row.split('|') 
        for arr in arrs:
            if arr != '':
                sub_col = pipe_col_dict[col][arr]
                df.loc[row_num, sub_col] += 1
        row_num += 1

In [15]:
df.to_csv('output.csv',index=False)

### Output csv with column names

In [36]:
#df_v2 = df.drop('Unnamed: 0', axis = 1).copy()
df_v2.columns = [col_dict[col] for col in df_v2.columns]
df_v2.to_csv('output_with_column_names.csv',index=False)
del df_v2

### Exclude null, outliers, etc

TBD

### End of preprocessing

In [37]:
df = pd.read_csv('output.csv')
num_records = len(df)

In [38]:
def summary_cate_col(col):
    if col in continous_cols:
        return
    
    elif col in pipe_col:
        dff = pd.DataFrame(columns=['Options','Count','Percent'])
        for key, value in pipe_col_dict[col].items():
            count = df[value].sum()
            dff = dff.append({'Options': key, 'Count' : count}, ignore_index=True)
    
    else:
        dff = pd.DataFrame(df[col].value_counts())
        dff.reset_index(inplace=True)
        dff.columns = ['Options','Count']
    
    dff['Percent'] = dff.Count / num_records
    dff = dff.sort_values(by = 'Percent', ascending = False).head(5)
    dff = dff.style.format({'Percent': '{:,.2%}'.format})
    return dff

### Basic Summary of each column (only show top 5)

In [39]:
for col in cate_cols:
    display(str(col) + ' : ' + col_dict[col])
    display(summary_cate_col(col))

"V_1 :  I've worked in design for:"

Unnamed: 0,Options,Count,Percent
0,1-4 years,4168,31.68%
1,5-9 years,3682,27.98%
2,10-14 years,2059,15.65%
3,15-20 years,1298,9.86%
4,20+ years,1243,9.45%


'V_2 : I primarily work in:'

Unnamed: 0,Options,Count,Percent
16,Graphic Design,8976,68.22%
9,Web Design,4935,37.51%
10,UX Design,4321,32.84%
23,UI Design,3991,30.33%
13,Marketing,3952,30.03%


"V_3 : I'm also good at:"

Unnamed: 0,Options,Count,Percent
16,Brainstorming and Ideation,10209,77.59%
14,Project Management,5695,43.28%
13,Leadership,5271,40.06%
12,Drawing or Painting,5245,39.86%
9,Writing,4838,36.77%


'V_4 : I am currently:'

Unnamed: 0,Options,Count,Percent
0,A full-time employee in-house,5539,42.10%
1,A full-time employee at an agency or consultancy,3757,28.55%
2,Self-employed or freelancing,2006,15.25%
3,A contract employee in-house,402,3.06%
4,Not working,396,3.01%


'V_5 : My company specializes in:'

Unnamed: 0,Options,Count,Percent
8,Digital,3408,25.90%
9,Branding,2986,22.69%
0,Print,2363,17.96%
1,Strategy,2079,15.80%
5,Advertising,2077,15.79%


'V_6 : My company is:'

Unnamed: 0,Options,Count,Percent
0,For-Profit/Corporate,3738,28.41%
1,Startup,681,5.18%
2,Non-Profit,605,4.60%
3,Education,449,3.41%
4,Other,217,1.65%


'V_7 : I teach:'

Unnamed: 0,Options,Count,Percent
3,Undergraduate school,265,2.01%
1,Graduate school,119,0.90%
2,Programs or workshops,86,0.65%
0,Art school,67,0.51%
5,Technical or Trade school,36,0.27%


"V_8 : I'm not working because:"

Unnamed: 0,Options,Count,Percent
0,Prefer not to say,155,1.18%
1,I was laid off,94,0.71%
2,I quit,72,0.55%
3,I'm retired or semi-retired,6,0.05%


'V_9 : My organization has:'

Unnamed: 0,Options,Count,Percent
0,1-10 employees,3235,24.59%
1,1000+ employees,2910,22.12%
2,11-50 employees,2423,18.41%
3,101-250 employees,1149,8.73%
4,51-100 employees,1140,8.66%


'V_10 : My immediate team is:'

Unnamed: 0,Options,Count,Percent
0,2-4 people,4919,37.38%
1,5-10 people,3805,28.92%
2,Just me,2302,17.50%
3,11-20 people,1202,9.14%
4,20+ people,528,4.01%


'V_11 : My work touches these industries or sectors:'

Unnamed: 0,Options,Count,Percent
26,Marketing and Communications,5413,41.14%
6,Advertising,5251,39.91%
28,Technology,4225,32.11%
25,Education,4113,31.26%
43,Web Services,3619,27.50%


'V_12 : I’ve been at my current place of work for:'

Unnamed: 0,Options,Count,Percent
0,1-2 years,3789,28.80%
1,Less than 1 year,3626,27.56%
2,3-4 years,2511,19.08%
3,5-7 years,1224,9.30%
4,10+ years,1112,8.45%


'V_13 : My role is:'

Unnamed: 0,Options,Count,Percent
0,Owner/Partner,1458,11.08%
1,Mid-level specialist,1424,10.82%
2,Art Director,761,5.78%
3,Junior contributor,622,4.73%
4,Creative Director,570,4.33%


'V_14 : My role is:'

Unnamed: 0,Options,Count,Percent
0,Mid-level specialist,2770,21.05%
1,Junior contributor,712,5.41%
2,Director,574,4.36%
3,Senior manager,569,4.32%
4,Team Leader,535,4.07%


'V_15 : My role is:'

Unnamed: 0,Options,Count,Percent
0,Associate Professor,122,0.93%
1,Professor,59,0.45%
2,Instructor,58,0.44%
3,Program Chair,48,0.36%
4,Other,42,0.32%


"V_16 : I've been in my current role for:"

Unnamed: 0,Options,Count,Percent
0,1-2 years,4424,33.62%
1,Less than 1 year,3955,30.06%
2,3-4 years,2239,17.02%
3,5-7 years,954,7.25%
4,10+ years,808,6.14%


'V_17 : I typically work:'

Unnamed: 0,Options,Count,Percent
0,40 hours per week,5626,42.76%
1,41-50 hours per week,4150,31.54%
2,Less than 40 hours per week,1991,15.13%
3,51-60 hours per week,727,5.53%
4,60+ hours per week,263,2.00%


'V_19 : My job provides:'

Unnamed: 0,Options,Count,Percent
2,Paid Time Off,8876,67.46%
15,Medical Insurance,8766,66.62%
0,Dental Insurance,7542,57.32%
7,Employer-matched Retirement,5872,44.63%
3,Life Insurance,5842,44.40%


'V_20 : In my current role, there is:'

Unnamed: 0,Options,Count,Percent
0,Some opportunity for advancement,3938,29.93%
1,Promising opportunity for advancement,3369,25.60%
2,Limited opportunity for advancement,3139,23.86%
3,No opportunity for advancement,1507,11.45%
4,Not sure,806,6.13%


'V_21 : In my current role, I am:'

Unnamed: 0,Options,Count,Percent
0,Learning some new skills,5539,42.10%
1,Learning and increasing my skill set every day,5175,39.33%
2,Learning very little,1720,13.07%
3,Learning nothing,252,1.92%
4,Not sure,73,0.55%


'V_22 : My job is:'

Unnamed: 0,Options,Count,Percent
0,Pretty good,8175,62.13%
1,The best,2239,17.02%
2,Not great,1971,14.98%
3,Not sure,211,1.60%
4,The worst,163,1.24%


'V_23 : My job stability is:'

Unnamed: 0,Options,Count,Percent
0,Fairly stable,7297,55.46%
1,Rock solid,3220,24.47%
2,Somewhat shaky,1610,12.24%
3,Non-existent,434,3.30%
4,Not sure,198,1.50%


'V_24 : In addition to my job, I also:'

Unnamed: 0,Options,Count,Percent
3,Design for friends and family,7098,53.94%
0,Maintain personal projects,6959,52.89%
2,Take occasional freelance projects,6266,47.62%
5,Volunteer,4081,31.02%
7,Have regular freelance clients,2184,16.60%


'V_25 : I fuel my daily creativity with coffee:'

Unnamed: 0,Options,Count,Percent
0,1-2 cups,6279,47.72%
1,3-4 cups,2449,18.61%
2,"No way, tea",2252,17.12%
3,Other,1542,11.72%
4,5-6 cups,483,3.67%


"V_26 : Highest level of education I've completed:"

Unnamed: 0,Options,Count,Percent
0,Bachelor's degree,9015,68.51%
1,Master's degree,2217,16.85%
2,Some college,710,5.40%
3,Associate's degree,579,4.40%
4,Technical degree or certificate,304,2.31%


'V_27 : I learned design:'

Unnamed: 0,Options,Count,Percent
2,In school,10560,80.26%
3,On the job,9215,70.03%
5,On my own,8419,63.98%
4,From programs or workshops,4769,36.24%
1,Through online training,4157,31.59%


'V_28 : My major focus of study was:'

Unnamed: 0,Options,Count,Percent
17,Graphic Design,8911,67.72%
10,Art,2957,22.47%
6,Visual Design,2552,19.40%
18,Website Design,2211,16.80%
13,Design and Technology,2166,16.46%


"V_29 : Over my career, I've had:"

Unnamed: 0,Options,Count,Percent
0,2-3 design jobs,5951,45.23%
1,4-5 design jobs,3187,24.22%
2,1 design job,1939,14.74%
3,6-7 design jobs,1076,8.18%
4,10+ design jobs,584,4.44%


"V_30 : I've shifted my specialty or focus area:"

Unnamed: 0,Options,Count,Percent
0,1-2 times,6502,49.41%
1,0 times,3129,23.78%
2,3-4 times,1242,9.44%
3,Slowly over time,1170,8.89%
4,Always ready for what's next,1115,8.47%


'V_31 : I stay current with design by:'

Unnamed: 0,Options,Count,Percent
2,Following design feeds,10656,80.98%
4,Reading online publications,10048,76.36%
5,Following social media,9636,73.23%
3,Discussing with peers,9173,69.71%
0,Reading books/magazines,7456,56.67%


"V_32 : I'm a member of:"

Unnamed: 0,Options,Count,Percent
0,None of these,6752,51.31%
8,AIGA,4861,36.94%
7,Creative Mornings,1783,13.55%
10,IXDA,761,5.78%
20,Freelancers Union,410,3.12%


"V_33 : I'd like to get better at:"

Unnamed: 0,Options,Count,Percent
9,UX Design,5673,43.11%
8,Web Design,5107,38.81%
19,UI Design,4895,37.20%
17,Motion Graphics,4717,35.85%
11,Leadership,4695,35.68%


'V_34 : I am:'

Unnamed: 0,Options,Count,Percent
0,"Open to opportunities, but not actively looking",5845,44.42%
1,Not interested in a new job right now,2961,22.50%
2,Casually looking for the right opportunity,2721,20.68%
3,Working hard to find my next job,1497,11.38%
4,Not sure,134,1.02%


"V_35 : Right now, I can't stop listening to:"

Unnamed: 0,Options,Count,Percent
0,Podcasts,255,1.94%
1,NPR,127,0.97%
2,podcasts,104,0.79%
3,Music,100,0.76%
4,Spotify,99,0.75%


'V_36 : I live in:'

Unnamed: 0,Options,Count,Percent
0,11238,70,0.53%
1,94117,63,0.48%
2,94110,58,0.44%
3,60647,50,0.38%
4,11201,48,0.36%


'V_38 : I am:'

Unnamed: 0,Options,Count,Percent
0,Female,7039,53.50%
1,Male,5895,44.80%
2,Prefer not to say,112,0.85%
3,Non-binary or third gender,102,0.78%
4,Other,10,0.08%


'V_39 : I consider myself:'

Unnamed: 0,Options,Count,Percent
0,Not LGBTQ,10516,79.92%
1,LGBTQ,1515,11.51%
2,Prefer not to say,1127,8.57%


'V_40 : My ethnicity is:'

Unnamed: 0,Options,Count,Percent
4,White or Caucasian,9624,73.14%
5,Asian,1653,12.56%
1,"Latina, Latino, Latinx, or Hispanic",1296,9.85%
0,Black or African American,544,4.13%
6,Other,389,2.96%


'V_41 : The most critical issues and challenges currently facing design are (answer optional):'

Unnamed: 0,Options,Count,Percent
0,"Design not having a ""seat at the table""",6081,46.22%
1,Diversity in design and tech,5560,42.26%
9,Ethics in design,5091,38.69%
5,Education cost and equity of access,4569,34.72%
13,Consumer vs. social impact focus,3922,29.81%


'V_42 : The emerging trends and technologies that will have the biggest impact on design are (answer optional):'

Unnamed: 0,Options,Count,Percent
7,AI and machine learning,7000,53.20%
5,Augmented Reality,5361,40.74%
13,Virtual Reality,5078,38.59%
17,Behavior tracking and modeling,4543,34.53%
3,Internet of Things,3839,29.18%


'V_43 : The most valuable design skills for the future will be (answer optional):'

Unnamed: 0,Options,Count,Percent
16,Adaptability to tech and social change,9170,69.69%
5,Communication skills,7671,58.30%
10,Empathy,6244,47.45%
0,Asking good questions,5727,43.52%
6,Cross-functional skills,5493,41.75%
