# Step1: Connect to AWS DynamoDB database - "test_candidates" table

- AWS has limited size of return responses so we won't get all candidates in one time table scanning
- if 'LastEvaluatedKey' presents in response, keep scaning table to get all candidates 

In [1]:
import boto3
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('test_candidates')
response = table.scan()
data = response['Items']
while 'LastEvaluatedKey' in response:
    response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
    data.extend(response['Items'])

In [2]:
import pandas as pd
data = pd.DataFrame(data)
# data = data[:len(data)//2]

# Step2: Data exploration

In [4]:
# pandas dataframe
# data.to_csv("test_candidate.csv")  # in case you want to export to csv
data

Unnamed: 0,address,alive_dead,avatar,avatar_url,birthday,bonus,consultant,country,current_salary,cv_link,...,stargazers_count,status,summary,target_company,target_position,time_spent_each_job,title,twitter,uuid,websites
0,,,/assets/pages/img/avatars/default.png,https://avatars2.githubusercontent.com/u/29790...,,,,,,,...,,,,test,,,,,1bd1109e-5256-331b-8acb-83a65fc35bb6,[]
1,,,"data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP//...",/assets/pages/img/avatars/default.png,09/29,,,Thailand,,,...,,,29/09,,,,Internal Audit Manager,,4d2fb049-7cce-3e32-8d32-94062b251811,[]
2,,,,https://avatars3.githubusercontent.com/u/55049...,,,,,,,...,,,,,,,,,4dcd43d8-1195-38ef-84a8-b0eb8fcd8be8,"[{'Website 1': {'name': 'Website 1', 'website'..."
3,,,,https://avatars3.githubusercontent.com/u/21639...,,,,,,,...,,,,,,,,,8dc2afd3-dfaa-3a76-951c-757b1c295c6e,"[{'Website 1': {'name': 'Website 1', 'website'..."
4,,,,https://avatars3.githubusercontent.com/u/17294...,,,,,,,...,,,,,,,,,223a2fcb-fd55-3ced-976c-fe7c34bd5e33,"[{'Website 1': {'name': 'Website 1', 'website'..."
5,111/48 Moo7 Suanyai Muang NonThaburi,,"data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP//...",,,,,Nonthaburi,,,...,,,,,,,HR Overseas Support Manager at SCG Packaging ...,,af3c454a-d9a2-3b43-ae98-2050cf474185,"[{'Website 1': 'nan'}, {'Website 2': 'nan'}, {..."
6,,,,https://avatars0.githubusercontent.com/u/70048...,,,,,,,...,,,,,,,,,38a8ebc7-aaa2-3e25-a22a-09b1402151d3,"[{'Website 1': {'name': 'Website 1', 'website'..."
7,,,,https://avatars2.githubusercontent.com/u/22047...,,,,,,,...,,,,,,,,,139d3932-68e5-3ced-a589-c606884134ed,"[{'Website 1': {'name': 'Website 1', 'website'..."
8,,,https://media.licdn.com/mpr/mpr/shrinknp_400_4...,,,,,Thailand,,,...,,,,,,,Recruitment Specialist,I have a skill of people management and most o...,e1a2daaf-a094-3933-9aa0-f30162531057,"[{'Website 1': 'nan'}, {'Website 2': 'nan'}, {..."
9,,,"data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP//...",,,,,Bangkok,,,...,,,"TOEIC 655, Learning SPSS, Interest in Leadersh...",,,,Student at Ramkhamhaeng University,,a5f2449c-b8c2-3ac5-9c4e-a752d76e9aad,"[{'Website 1': 'nan'}, {'Website 2': 'nan'}, {..."


In [7]:
# all data columns
print(data.columns)
print("there are {} columns in this dataframe".format(len(data.columns)))

Index(['address', 'alive_dead', 'avatar', 'avatar_url', 'birthday', 'bonus',
       'consultant', 'country', 'current_salary', 'cv_link', 'devpost_url',
       'educations', 'email', 'followers', 'followings', 'full_name',
       'github_created_at', 'github_followers', 'github_followings',
       'github_public_gists', 'github_public_repos', 'github_updated_at',
       'github_url', 'hirable', 'jbscore', 'jobs', 'kaggle_url', 'last_update',
       'linkedin_url', 'location', 'messengers', 'neighborhood',
       'organizations', 'ownership', 'phones', 'profile_url', 'public_gists',
       'public_repos', 'reason_for_leaving', 'repos_fork', 'repos_forks_count',
       'repos_languages', 'salary', 'skills', 'stargazers_count', 'status',
       'summary', 'target_company', 'target_position', 'time_spent_each_job',
       'title', 'twitter', 'uuid', 'websites'],
      dtype='object')
there are 54 columns in this dataframe


# Step3: Feature selection, create a clone of original data 

In [11]:
test_data = data.copy()

In [27]:
test_data = data.copy()
# TODO later we should incorporate these features to get insightful/hidden information
# 3.1 remove unnessesary columns 
del test_data["websites"]
del test_data["twitter"]
del test_data["alive_dead"]
del test_data["address"]
del test_data["avatar"]
del test_data["avatar_url"]
del test_data["birthday"]
del test_data["cv_link"]
del test_data["devpost_url"]
del test_data["email"]
del test_data["full_name"]
del test_data["kaggle_url"]
del test_data["github_created_at"]
del test_data["last_update"]
del test_data["profile_url"]
del test_data["phones"]
del test_data["linkedin_url"]
del test_data["messengers"]
del test_data["neighborhood"]
del test_data["current_salary"]
del test_data["time_spent_each_job"]
del test_data["status"]
del test_data["stargazers_count"]
del test_data["repos_languages"]
del test_data["repos_fork"]
del test_data["consultant"]
del test_data["bonus"]
del test_data["followers"]
del test_data["followings"]
del test_data["github_url"]
del test_data["jbscore"]
del test_data["public_repos"]
del test_data["repos_forks_count"]
del test_data["uuid"]
del test_data["github_updated_at"]
del test_data["country"]
del test_data["hirable"]
del test_data["public_gists"]
del test_data["ownership"]
del test_data["target_company"]
del test_data["target_position"]
del test_data["reason_for_leaving"]

# 3.2 check if skills column has null value
item = test_data["skills"].isnull()
drop_list = []
for i in range(len(item)):
    # 3.2.1 candidates with null skills value
    if item[i] == True:
        drop_list.append(i)
        
test_data = test_data.drop(test_data.index[drop_list])
print("there are {} candidates with null \"skill\"".format(len(test_data)))
test_data.head()

there are 1892 candidates with null "skill"


Unnamed: 0,educations,github_followers,github_followings,github_public_gists,github_public_repos,jobs,location,organizations,salary,skills,summary,title
0,[],,,,,,Bangkok,[],,[],,
1,[],,,,,,"Bangkok Metropolitan Area, Thailand",[],,[],29/09,Internal Audit Manager
5,[{'Education 1': {'Education Start 1': '1999.0...,,,,,,"Mueang Nonthaburi, Nonthaburi, Thailand",[{'Organization 1': {'Organization End 1': 'PR...,,[],,HR Overseas Support Manager at SCG Packaging ...
8,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'No...,,[],,Recruitment Specialist
9,[{'Education 1': {'Education Start 1': '2012.0...,,,,,,"Bangkok Noi, Bangkok, Thailand",[{'Organization 1': {'Organization End 1': 'PR...,,[],"TOEIC 655, Learning SPSS, Interest in Leadersh...",Student at Ramkhamhaeng University


In [28]:
# test_data.head()
test_data = test_data.reset_index(drop=True)
test_data

Unnamed: 0,educations,github_followers,github_followings,github_public_gists,github_public_repos,jobs,location,organizations,salary,skills,summary,title
0,[],,,,,,Bangkok,[],,[],,
1,[],,,,,,"Bangkok Metropolitan Area, Thailand",[],,[],29/09,Internal Audit Manager
2,[{'Education 1': {'Education Start 1': '1999.0...,,,,,,"Mueang Nonthaburi, Nonthaburi, Thailand",[{'Organization 1': {'Organization End 1': 'PR...,,[],,HR Overseas Support Manager at SCG Packaging ...
3,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'No...,,[],,Recruitment Specialist
4,[{'Education 1': {'Education Start 1': '2012.0...,,,,,,"Bangkok Noi, Bangkok, Thailand",[{'Organization 1': {'Organization End 1': 'PR...,,[],"TOEIC 655, Learning SPSS, Interest in Leadersh...",Student at Ramkhamhaeng University
5,[],,,,,,,[],,[],,
6,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Phaya Thai, Bangkok, Thailand",[{'Organization 1': {'Organization End 1': 'na...,,[],,Intermediate Software Engineer at Orion Health
7,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ap...,,"[{'Strategy': 98}, {'Start-ups': 93}, {'Busine...",09/11,Co-packer Buyer at RB (3rd manufacturing)
8,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ma...,,"[{'Management': 14}, {'Social Media': 14}, {'M...",,Head of Analytics Development
9,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Lam Luk Ka, Pathum Thani, Thailand",[{'Organization 1': {'Organization End 1': 'PR...,,[],,Product Owner at Kaidee.com


In [34]:
# 3.3 drop columns where there is no skills
drop_list = []
for i in range(len(test_data["skills"])):
    if len(test_data.loc[i]["skills"]) == 0:
        drop_list.append(i)
test_data = test_data.drop(test_data.index[drop_list])
test_data = test_data.reset_index(drop=True)
print("there are {} candidates with not null skills".format(len(test_data)))
test_data

there are 1203 candidates with not null skills


Unnamed: 0,educations,github_followers,github_followings,github_public_gists,github_public_repos,jobs,location,organizations,salary,skills,summary,title
0,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ap...,,"[{'Strategy': 98}, {'Start-ups': 93}, {'Busine...",09/11,Co-packer Buyer at RB (3rd manufacturing)
1,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ma...,,"[{'Management': 14}, {'Social Media': 14}, {'M...",,Head of Analytics Development
2,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Oc...,,"[{'Sales': 4}, {'Logistics': 4}, {'Management'...",,Independent Consultant and Executive Coach
3,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ju...,,"[{'Food': 10}, {'Marketing Strategy': 9}, {'Ma...",,Infrastructure Delivery Manager at Accenture
4,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ja...,,"[{'FinTech': 42}, {'Biometrics': 41}, {'Tokeni...",,IT Operations Executive
5,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ja...,,"[{'JSON': 4}, {'HTML': 4}, {'PHP': 3}, {'OOP':...",,Talent Acquisition Lead - Thailand at Accentur...
6,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ma...,,"[{'Project Management': 9}, {'SQL': 8}, {'Java...",,Regional Sales Director (Thailand) at Pegasystems
7,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ma...,,"[{'Management': 25}, {'Sales Management': 22},...",,Ph.D Student (Business Analytics)
8,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Phra Khanong, Bangkok, Thailand",[{'Organization 1': {'Organization End 1': 'Ap...,,"[{'Media Relations': 20}, {'Public Relations':...",,Educator
9,"[{'Education 1': {'Education Start 1': 'nan', ...",,,,,,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ju...,,"[{'Luxury Goods': 8}, {'Management': 3}, {'Sal...",,Vice President Finance at agoda.com


In [35]:
# TODO fill in data for candidates to include github, 
# 3.4 drop columns where values are null
drop_data = test_data.copy()
del drop_data["github_followers"]
del drop_data["github_followings"]
del drop_data["github_public_gists"]
del drop_data["github_public_repos"]
del drop_data["salary"]
del drop_data["summary"]

In [36]:
drop_data

Unnamed: 0,educations,jobs,location,organizations,skills,title
0,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ap...,"[{'Strategy': 98}, {'Start-ups': 93}, {'Busine...",Co-packer Buyer at RB (3rd manufacturing)
1,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ma...,"[{'Management': 14}, {'Social Media': 14}, {'M...",Head of Analytics Development
2,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Oc...,"[{'Sales': 4}, {'Logistics': 4}, {'Management'...",Independent Consultant and Executive Coach
3,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ju...,"[{'Food': 10}, {'Marketing Strategy': 9}, {'Ma...",Infrastructure Delivery Manager at Accenture
4,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ja...,"[{'FinTech': 42}, {'Biometrics': 41}, {'Tokeni...",IT Operations Executive
5,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ja...,"[{'JSON': 4}, {'HTML': 4}, {'PHP': 3}, {'OOP':...",Talent Acquisition Lead - Thailand at Accentur...
6,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ma...,"[{'Project Management': 9}, {'SQL': 8}, {'Java...",Regional Sales Director (Thailand) at Pegasystems
7,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ma...,"[{'Management': 25}, {'Sales Management': 22},...",Ph.D Student (Business Analytics)
8,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Phra Khanong, Bangkok, Thailand",[{'Organization 1': {'Organization End 1': 'Ap...,"[{'Media Relations': 20}, {'Public Relations':...",Educator
9,"[{'Education 1': {'Education Start 1': 'nan', ...",,"Bangkok Metropolitan Area, Thailand",[{'Organization 1': {'Organization End 1': 'Ju...,"[{'Luxury Goods': 8}, {'Management': 3}, {'Sal...",Vice President Finance at agoda.com


In [37]:
for i in range(len(drop_data)):
    loca = drop_data.iloc[i]["location"]
    sloca = loca.split(" ")
    drop_data.iloc[i] = drop_data.iloc[i].set_value("location", sloca[-1])
drop_data
del drop_data["title"]

In [38]:
len(drop_data)

1203

In [39]:
count_skill = {}
new_data = drop_data.copy()
count = 0
for i in range(len(new_data)):
    skills = new_data.iloc[i]["skills"]
    for skill in skills:     
        k = str(list(skill.keys())[0])
        v = int(list(skill.values())[0])
        if k not in count_skill:
            count_skill[k] = 1
            count += 1
        else:
            count_skill[k] += 1
inData = {}
# print(count_skill)
for i in range(len(new_data)):
    skills = new_data.iloc[i]["skills"]
    print(i)
    for skill in skills:     
        k = str(list(skill.keys())[0])
        v = int(list(skill.values())[0])
        if count_skill[k] > 10:
            if k not in inData:
                new_data.insert(0, k, 0, allow_duplicates=False)
                inData[k] = 1
            new_data.iloc[i] = new_data.iloc[i].set_value(k, 1)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [40]:
skilled_data = new_data.copy()
skilled_data.head()
# drop_data.to_csv("full_candidate.csv")

Unnamed: 0,MVC,Contract Management,MS Project,Swift,Valuation,Technical Support,UML,Customer Relations,Insurance,3G,...,Entrepreneurship,Management,Business Strategy,Start-ups,Strategy,educations,jobs,location,organizations,skills
0,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,"[{'Education 1': {'Education Start 1': 'nan', ...",,Thailand,[{'Organization 1': {'Organization End 1': 'Ap...,"[{'Strategy': 98}, {'Start-ups': 93}, {'Busine..."
1,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,"[{'Education 1': {'Education Start 1': 'nan', ...",,Thailand,[{'Organization 1': {'Organization End 1': 'Ma...,"[{'Management': 14}, {'Social Media': 14}, {'M..."
2,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,"[{'Education 1': {'Education Start 1': 'nan', ...",,Thailand,[{'Organization 1': {'Organization End 1': 'Oc...,"[{'Sales': 4}, {'Logistics': 4}, {'Management'..."
3,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,"[{'Education 1': {'Education Start 1': 'nan', ...",,Thailand,[{'Organization 1': {'Organization End 1': 'Ju...,"[{'Food': 10}, {'Marketing Strategy': 9}, {'Ma..."
4,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,"[{'Education 1': {'Education Start 1': 'nan', ...",,Thailand,[{'Organization 1': {'Organization End 1': 'Ja...,"[{'FinTech': 42}, {'Biometrics': 41}, {'Tokeni..."


In [41]:
del skilled_data["skills"]
del skilled_data["organizations"]
del skilled_data["educations"]

In [42]:
# 3.5 get the resulting matrix are the one-hot encoded of candidates data
skilled_data

Unnamed: 0,MVC,Contract Management,MS Project,Swift,Valuation,Technical Support,UML,Customer Relations,Insurance,3G,...,Project Management,Strategic Planning,Business Development,Entrepreneurship,Management,Business Strategy,Start-ups,Strategy,jobs,location
0,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,,Thailand
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,,Thailand
2,0,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,,Thailand
3,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,,Thailand
4,0,0,0,0,0,0,0,0,0,0,...,0,1,1,1,1,1,1,1,,Thailand
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,,Thailand
6,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,,Thailand
7,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,1,1,0,1,,Thailand
8,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,1,,Thailand
9,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,1,,Thailand


In [43]:
loc_data = skilled_data.copy()
for i in range(len(loc_data)):
    locs = new_data.iloc[i]["location"]
    print(i)
    if locs not in loc_data:
        loc_data.insert(0, locs, 0, allow_duplicates=False)
    loc_data.iloc[i] = loc_data.iloc[i].set_value(locs, 1)
loc_data

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

Unnamed: 0,Other,California,Hungary,Poland,Singapore,India,Belarus,Kingdom,States,area,...,Project Management,Strategic Planning,Business Development,Entrepreneurship,Management,Business Strategy,Start-ups,Strategy,jobs,location
0,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,,Thailand
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,,Thailand
2,0,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,,Thailand
3,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,,Thailand
4,0,0,0,0,0,0,0,0,0,0,...,0,1,1,1,1,1,1,1,,Thailand
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,,Thailand
6,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,,Thailand
7,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,1,1,0,1,,Thailand
8,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,1,,Thailand
9,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,1,,Thailand


# 4. Final Data

In [44]:
final_data = loc_data.copy()

In [45]:
del final_data["location"]

# final_data.to_csv("full_candidate_final.csv")