# Importing json profiles into a database 

In [98]:
import pandas as pd
import json
import numpy as np

In [99]:
def create_profile(x):
    with open('jsons/'+str(x)) as f:
        dict1 = json.load(f)
    list_col= ['experience', 'education', 'languages']
    for n in list_col:
        if n in dict1:
            for i in range(len(dict1[n])): 
                dict1[str(n+str(i+1))]= dict1[n][i]
    data = pd.DataFrame.from_dict(dict1, orient='index').T
    return data

In [100]:
import os
file_list = [f for f in os.listdir('jsons') if f.endswith(".json")]
len(file_list)
# you can replace the list file_list by whatever list of jsons you want to import

49

In [101]:
initial_profile=create_profile('brachetantoine.json')

In [102]:
list_profiles=file_list
profiles = initial_profile
for i in list_profiles:
    a= create_profile(i)
    profiles= pd.concat((profiles,a),axis=0)

In [103]:
profiles.reset_index(inplace=True)

# Initial cleaning

### dropping useless columns and duplicates

In [104]:
pd.options.display.max_rows = 100
profiles.describe().T

# total 48 rows

# ['profile_id'] = str : linkedin id - num and letters, 47 unique 
# ['lastName'] - full 
# ['firstName'] - full 
# ['geoCountryName'] = country name, 3 values, 46 filled
# ['geoLocationName'] = region name, 11 unique - check and recode 
# ['birthDate'] = 35 missing  

# ['summary'] = text, includes \n, 32 unique
# ['industryName'] = short text, full, 16 unique
# ['headline'] = professional headline, 47 unique 

# ['experience'] = liste de dictionnaires. chaque élément est accessible par [0], [1] - full 
# ['education'][0] = liste de dictionnaires - full 
# ['languages'] = list de dictionnaires - name, proficiency - 32 
# ['publications'] =  16 
# ['certifications'] = 11
# ['volunteer'] = 17 
# ['honors'] =  12
# ['projects'] =  22

# to drop : 
# displayPictureUrl', 'img_100_100', 'img_200_200', 'img_400_400', 'img_800_800',
# ['student'] = boolean T/F - only false 
# ['geoCountryUrn'] === CODED - internal LIn code 
# ['geoLocationBackfilled'] === boolean T/F - only false 
# ['elt'] === boolean T/F - only false 
# ['industryUrn'] === CODED (format urn:li:fs_industry:96) // to industryName 
# ['displayPictureUrl'] = url to picture (not accessible without linkedin )
# ['img_100_100', 'img_200_200', 'img_400_400', 'img_800_800'] = ref of image not accessible 
# ['profile_urn'] = str : format urn:li:fs_miniProfile:ACoAAAA615EBaNquQR5gOz_oFr9emeCr0ZNw67M 
# ['member_urn'] = str : format urn:li:member:3856273
# ['public_id'] = str : lastnamefirstname lower ()
# ['entityUrn'] === CODED (format urn:li:fs_profile:ACoAAAA615EBaNquQR5gOz_oFr9emeCr0ZNw67M)
# ['geoLocation']) === CODED, 12 unique 
# ['location'] === dictionary with one nested dictionary, country code - 4 unique 
# ['locationName'] = country name, 4 values (incl. other), 46 filled
# ['address'] = only 1 value (email)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
index,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [105]:
profiles.columns

Index(['index', 'summary', 'industryName', 'lastName', 'student',
       'geoCountryName', 'geoCountryUrn', 'geoLocationBackfilled', 'elt',
       'industryUrn', 'firstName', 'entityUrn', 'geoLocation',
       'geoLocationName', 'location', 'headline', 'displayPictureUrl',
       'img_100_100', 'img_200_200', 'img_400_400', 'img_800_800',
       'profile_id', 'profile_urn', 'member_urn', 'public_id', 'experience',
       'education', 'languages', 'publications', 'certifications', 'volunteer',
       'honors', 'projects', 'experience1', 'experience2', 'experience3',
       'experience4', 'experience5', 'education1', 'education2', 'education3',
       'languages1', 'languages2', 'languages3', 'locationName', 'birthDate',
       'languages4', 'address', 'languages5', 'languages6', 'languages7'],
      dtype='object')

In [106]:
profiles = profiles[['index', 'profile_id', 'lastName', 'firstName', 'geoCountryName', 'geoLocationName', 
                                  'summary', 'industryName', 'headline', 'experience', 'education', 
                                  'languages', 'publications', 'certifications', 'volunteer', 'honors', 'projects', 
                                  'experience1','experience2', 'experience3', 'experience4', 'experience5',
                                  'education1', 'education2', 'education3', 'languages1', 'languages2',
                                  'languages3','languages4', 'languages5', 'languages6', 'languages7']]

In [107]:
mask = profiles.duplicated(subset=['profile_id'], keep='first')
dropped = profiles.loc[mask]
dropped

Unnamed: 0,index,profile_id,lastName,firstName,geoCountryName,geoLocationName,summary,industryName,headline,experience,...,education1,education2,education3,languages1,languages2,languages3,languages4,languages5,languages6,languages7
3,0,ACoAAAA615EBaNquQR5gOz_oFr9emeCr0ZNw67M,Brachet,Antoine,France,Greater Paris Metropolitan Region,Antoine croit à l’intelligence de tous et à la...,IT Services and IT Consulting,"Directeur associé de bluenove, initiateur du m...","[{'locationName': 'Paris Area, France', 'entit...",...,{'entityUrn': 'urn:li:fs_education:(ACoAAAA615...,{'entityUrn': 'urn:li:fs_education:(ACoAAAA615...,{'entityUrn': 'urn:li:fs_education:(ACoAAAA615...,"{'name': 'English', 'proficiency': 'FULL_PROFE...","{'name': 'French', 'proficiency': 'NATIVE_OR_B...","{'name': 'German', 'proficiency': 'ELEMENTARY'}",,,,


In [108]:
profiles.drop_duplicates(subset=['profile_id'], keep='first', inplace=True)

### Recoding the honors, publications, volunteer, projects, certifications and languages columns

In [109]:
profiles.isna().sum().sort_values(ascending=False)
# we have 47 rows 
# we will drop the columns with more than 37 missing values
# for columns projects, publications, languages, certifications, and honors 
# we will create one column that states whether this section was filled
# and keep the detail in just the original column, as text. 

languages7         46
languages6         44
languages5         44
languages4         36
languages3         23
languages2         18
summary            15
languages1         15
education3         12
education2          6
geoLocationName     4
experience5         3
geoCountryName      2
experience4         2
education1          1
experience2         0
experience3         0
index               0
experience1         0
profile_id          0
honors              0
volunteer           0
certifications      0
publications        0
languages           0
education           0
experience          0
headline            0
industryName        0
firstName           0
lastName            0
projects            0
dtype: int64

In [110]:
#HONORS
profiles['honors'] = (profiles['honors']).astype(str)
profiles['honors_stated']=np.where(profiles['honors']=='[]', '0', '1')
profiles['honors_stated'].value_counts()

0    36
1    11
Name: honors_stated, dtype: int64

In [111]:
#PUBLICATIONS
profiles['publications'] = (profiles['publications'] ).astype(str)
profiles['pubs_stated']=np.where(profiles['publications']=='[]', '0', '1')
profiles['pubs_stated'].value_counts()

0    32
1    15
Name: pubs_stated, dtype: int64

In [112]:
#VOLUNTEER
profiles['volunteer'] = (profiles['volunteer'] ).astype(str)
profiles['volunteer_stated']=np.where(profiles['volunteer']=='[]', '0', '1')
profiles['volunteer_stated'].value_counts()

0    31
1    16
Name: volunteer_stated, dtype: int64

In [113]:
#PROJECTS
profiles['projects'] = (profiles['projects'] ).astype(str)
profiles['projects_stated']=np.where(profiles['projects']=='[]', '0', '1')
profiles['projects_stated'].value_counts()

0    26
1    21
Name: projects_stated, dtype: int64

In [114]:
# CERTIFICATIONS
profiles['certifications'] = (profiles['certifications'] ).astype(str)
profiles['certifications_stated']=np.where(profiles['certifications']=='[]', '0', '1')
profiles['certifications_stated'].value_counts()

0    37
1    10
Name: certifications_stated, dtype: int64

In [115]:
# LANGUAGE_COUNT
profiles['languages'] = (profiles['languages']).astype(str)
profiles['languages3'] = (profiles['languages3']).astype(str)
profiles['languages_over2']=np.where(profiles['languages3']=='nan', '0', '1')
profiles['languages_over2'].value_counts()


1    24
0    23
Name: languages_over2, dtype: int64

In [116]:
# LANGUAGES
profiles['languages_stated']=np.where(profiles['languages']=='[]', '0', '1')
profiles['languages_stated'].value_counts()

1    32
0    15
Name: languages_stated, dtype: int64

In [117]:
# DROPPING THE COLUMNS WE HAVE RECODED 
profiles.drop(columns=['languages1', 'languages2',
       'languages3', 'languages4', 'languages5', 'languages6', 'languages7'], inplace=True)

Notes for later : 
* there is surely a better way to deal with the languages, publications, honors, volunteers, project and certifications information. However, we have so many missing values that a yes/no column for each of these categories (filled or not) already provides some information. 
* the languages category could be split in different ways. We chose to highlight the profiles who state 3 or more languages spoken. For this category, the name of languages could be treated as text.

**Important** 
We could change the import code to not split the languages, publications, honors, volunteers, projects and certification columns, but instead just add a column based on the existing ones. 


### Dealing with the experience and education columns

In [118]:
profiles.describe(include='O')

Unnamed: 0,profile_id,lastName,firstName,geoCountryName,geoLocationName,summary,industryName,headline,experience,education,...,education1,education2,education3,honors_stated,pubs_stated,volunteer_stated,projects_stated,certifications_stated,languages_over2,languages_stated
count,47,47,47,45,43,32,47,47,47,47,...,46,41,35,47,47,47,47,47,47,47
unique,47,47,41,3,11,32,16,47,47,47,...,46,41,35,2,2,2,2,2,2,2
top,ACoAAAA615EBaNquQR5gOz_oFr9emeCr0ZNw67M,Brachet,Nicolas,France,"Paris, Île-de-France",Antoine croit à l’intelligence de tous et à la...,IT Services and IT Consulting,"Directeur associé de bluenove, initiateur du m...","[{'locationName': 'Paris Area, France', 'entit...",[{'entityUrn': 'urn:li:fs_education:(ACoAAAA61...,...,{'entityUrn': 'urn:li:fs_education:(ACoAAAA615...,{'entityUrn': 'urn:li:fs_education:(ACoAAAA615...,{'entityUrn': 'urn:li:fs_education:(ACoAAAA615...,0,0,0,0,0,1,1
freq,1,1,3,38,15,1,10,1,1,1,...,1,1,1,36,32,31,26,37,24,32


In [119]:
profiles_experience=pd.concat([profiles, 
pd.DataFrame(profiles['experience1'].apply(pd.Series)).add_prefix('exp1_'), 
pd.DataFrame(profiles['experience2'].apply(pd.Series)).add_prefix('exp2_'), 
pd.DataFrame(profiles['experience3'].apply(pd.Series)).add_prefix('exp3_'), 
pd.DataFrame(profiles['experience4'].apply(pd.Series)).add_prefix('exp4_'), 
pd.DataFrame(profiles['experience5'].apply(pd.Series)).add_prefix('exp5_')], 
axis=1).reset_index(drop=True)

In [120]:
profiles_experience= pd.concat([profiles_experience, 
pd.DataFrame(profiles_experience['education1'].apply(pd.Series)).add_prefix('ed1_'), 
pd.DataFrame(profiles_experience['education2'].apply(pd.Series)).add_prefix('ed2_'), 
pd.DataFrame(profiles_experience['education3'].apply(pd.Series)).add_prefix('ed3_')], 
axis=1).reset_index(drop=True)

In [121]:
print(list(profiles_experience.columns))

['index', 'profile_id', 'lastName', 'firstName', 'geoCountryName', 'geoLocationName', 'summary', 'industryName', 'headline', 'experience', 'education', 'languages', 'publications', 'certifications', 'volunteer', 'honors', 'projects', 'experience1', 'experience2', 'experience3', 'experience4', 'experience5', 'education1', 'education2', 'education3', 'honors_stated', 'pubs_stated', 'volunteer_stated', 'projects_stated', 'certifications_stated', 'languages_over2', 'languages_stated', 'exp1_locationName', 'exp1_entityUrn', 'exp1_geoLocationName', 'exp1_companyName', 'exp1_timePeriod', 'exp1_description', 'exp1_company', 'exp1_title', 'exp1_companyUrn', 'exp1_companyLogoUrl', 'exp1_$anti_abuse_metadata', 'exp1_geoUrn', 'exp1_region', 'exp1_honors', 'exp1_organizations', 'exp2_entityUrn', 'exp2_companyName', 'exp2_timePeriod', 'exp2_company', 'exp2_title', 'exp2_companyUrn', 'exp2_companyLogoUrl', 'exp2_locationName', 'exp2_geoLocationName', 'exp2_geoUrn', 'exp2_description', 'exp2_region', 

In [122]:
for i in range (1,6):
    profiles_experience['exp'+str(i)+'_startDate_month']= (pd.DataFrame(profiles_experience['exp'+str(i)+'_timePeriod'].apply(pd.Series))['startDate']).apply(pd.Series)['month']
    profiles_experience['exp'+str(i)+'_startDate_year']= (pd.DataFrame(profiles_experience['exp'+str(i)+'_timePeriod'].apply(pd.Series))['startDate']).apply(pd.Series)['year']
    profiles_experience['exp'+str(i)+'_endDate_month']= (pd.DataFrame(profiles_experience['exp'+str(i)+'_timePeriod'].apply(pd.Series))['endDate']).apply(pd.Series)['month']
    profiles_experience['exp'+str(i)+'_endDate_year']= (pd.DataFrame(profiles_experience['exp'+str(i)+'_timePeriod'].apply(pd.Series))['endDate']).apply(pd.Series)['year']
    profiles_experience['exp'+str(i)+'_industry']= str(pd.DataFrame(profiles_experience['exp1_company'].apply(pd.Series))['industries'])
    profiles_experience['exp'+str(i)+'_company_empl_low']= (pd.DataFrame(profiles_experience['exp1_company'].apply(pd.Series))['employeeCountRange']).apply(pd.Series)['start']
    profiles_experience['exp'+str(i)+'_company_empl_high']= (pd.DataFrame(profiles_experience['exp1_company'].apply(pd.Series))['employeeCountRange']).apply(pd.Series)['end']

In [123]:
for i in range (1,4):
    profiles_experience['ed'+str(i)+'_startDate_month']= (pd.DataFrame(profiles_experience['ed'+str(i)+'_timePeriod'].apply(pd.Series))['startDate']).apply(pd.Series)['month']
    profiles_experience['ed'+str(i)+'_startDate_year']= (pd.DataFrame(profiles_experience['ed'+str(i)+'_timePeriod'].apply(pd.Series))['startDate']).apply(pd.Series)['year']

In [124]:
print(list(profiles_experience.columns))

['index', 'profile_id', 'lastName', 'firstName', 'geoCountryName', 'geoLocationName', 'summary', 'industryName', 'headline', 'experience', 'education', 'languages', 'publications', 'certifications', 'volunteer', 'honors', 'projects', 'experience1', 'experience2', 'experience3', 'experience4', 'experience5', 'education1', 'education2', 'education3', 'honors_stated', 'pubs_stated', 'volunteer_stated', 'projects_stated', 'certifications_stated', 'languages_over2', 'languages_stated', 'exp1_locationName', 'exp1_entityUrn', 'exp1_geoLocationName', 'exp1_companyName', 'exp1_timePeriod', 'exp1_description', 'exp1_company', 'exp1_title', 'exp1_companyUrn', 'exp1_companyLogoUrl', 'exp1_$anti_abuse_metadata', 'exp1_geoUrn', 'exp1_region', 'exp1_honors', 'exp1_organizations', 'exp2_entityUrn', 'exp2_companyName', 'exp2_timePeriod', 'exp2_company', 'exp2_title', 'exp2_companyUrn', 'exp2_companyLogoUrl', 'exp2_locationName', 'exp2_geoLocationName', 'exp2_geoUrn', 'exp2_description', 'exp2_region', 

In [125]:
list_columns=[i for i in profiles_experience.columns if ("Urn" in i or "Logo" in i or "_region" in i or '_abuse' in i or '_geo' in i or '_proj' in i or '_organi' in i or '_0' in i)]
#to check before dropping 
#profiles_experience[list_columns]
profiles_experience.drop(columns=list_columns, inplace=True)

In [126]:
profiles_experience.drop(columns='index', inplace=True)
profiles_experience.head()

Unnamed: 0,profile_id,lastName,firstName,geoCountryName,geoLocationName,summary,industryName,headline,experience,education,...,exp5_endDate_year,exp5_industry,exp5_company_empl_low,exp5_company_empl_high,ed1_startDate_month,ed1_startDate_year,ed2_startDate_month,ed2_startDate_year,ed3_startDate_month,ed3_startDate_year
0,ACoAAAA615EBaNquQR5gOz_oFr9emeCr0ZNw67M,Brachet,Antoine,France,Greater Paris Metropolitan Region,Antoine croit à l’intelligence de tous et à la...,IT Services and IT Consulting,"Directeur associé de bluenove, initiateur du m...","[{'locationName': 'Paris Area, France', 'entit...",[{'entityUrn': 'urn:li:fs_education:(ACoAAAA61...,...,2017.0,0 [Management Consulting]\n1...,11.0,50.0,,1998.0,,1996.0,,1993.0
1,ACoAAAfK9YwBjZr16cDaVuxZICOg0QUnbPiUoXE,Demri,Bobby,France,"Paris, Île-de-France",French Entrepreneur - Founder and Managing Par...,Venture Capital and Private Equity Principals,Founder & Managing Partner at ROCH Ventures,[{'entityUrn': 'urn:li:fs_position:(ACoAAAfK9Y...,[{'entityUrn': 'urn:li:fs_education:(ACoAAAfK9...,...,2017.0,0 [Management Consulting]\n1...,2.0,10.0,,2006.0,,2005.0,,1998.0
2,ACoAAAUn_5ABqO0mSShQxo4gFyTaCDoaYUk5Fm8,Durieux,Sarah,France,"Paris, Île-de-France",Inspired by the power we can build to change t...,Civic and Social Organizations,Co-director Multitudes Foundation - Activist a...,[{'entityUrn': 'urn:li:fs_position:(ACoAAAUn_5...,[{'entityUrn': 'urn:li:fs_education:(ACoAAAUn_...,...,2021.0,0 [Management Consulting]\n1...,2.0,10.0,3.0,2022.0,9.0,2020.0,9.0,2020.0
3,ACoAAARi9fcBfH1GjbFHnWc1QqHftfh3LQjRUEg,Jaillot,Bastien,France,Greater Paris Metropolitan Region,"IT Consultant, Technical Expert, Lead develope...",IT Services and IT Consulting,Web expert chez Jolicode,[{'entityUrn': 'urn:li:fs_position:(ACoAAARi9f...,[{'entityUrn': 'urn:li:fs_education:(ACoAAARi9...,...,,0 [Management Consulting]\n1...,,,,2004.0,,,,
4,ACoAAApOhv4B3_GF-OGg8-ipxxkMf6AOggWigl8,Vanneroy,Coline,France,Greater Paris Metropolitan Region,,IT Services and IT Consulting,Directrice des opérations chez Cap Collectif,[{'entityUrn': 'urn:li:fs_position:(ACoAAApOhv...,[{'entityUrn': 'urn:li:fs_education:(ACoAAApOh...,...,2009.0,0 [Management Consulting]\n1...,11.0,50.0,,2005.0,,2002.0,,


In [127]:
profiles_experience.reset_index(inplace=True)
profiles_experience.head()

Unnamed: 0,index,profile_id,lastName,firstName,geoCountryName,geoLocationName,summary,industryName,headline,experience,...,exp5_endDate_year,exp5_industry,exp5_company_empl_low,exp5_company_empl_high,ed1_startDate_month,ed1_startDate_year,ed2_startDate_month,ed2_startDate_year,ed3_startDate_month,ed3_startDate_year
0,0,ACoAAAA615EBaNquQR5gOz_oFr9emeCr0ZNw67M,Brachet,Antoine,France,Greater Paris Metropolitan Region,Antoine croit à l’intelligence de tous et à la...,IT Services and IT Consulting,"Directeur associé de bluenove, initiateur du m...","[{'locationName': 'Paris Area, France', 'entit...",...,2017.0,0 [Management Consulting]\n1...,11.0,50.0,,1998.0,,1996.0,,1993.0
1,1,ACoAAAfK9YwBjZr16cDaVuxZICOg0QUnbPiUoXE,Demri,Bobby,France,"Paris, Île-de-France",French Entrepreneur - Founder and Managing Par...,Venture Capital and Private Equity Principals,Founder & Managing Partner at ROCH Ventures,[{'entityUrn': 'urn:li:fs_position:(ACoAAAfK9Y...,...,2017.0,0 [Management Consulting]\n1...,2.0,10.0,,2006.0,,2005.0,,1998.0
2,2,ACoAAAUn_5ABqO0mSShQxo4gFyTaCDoaYUk5Fm8,Durieux,Sarah,France,"Paris, Île-de-France",Inspired by the power we can build to change t...,Civic and Social Organizations,Co-director Multitudes Foundation - Activist a...,[{'entityUrn': 'urn:li:fs_position:(ACoAAAUn_5...,...,2021.0,0 [Management Consulting]\n1...,2.0,10.0,3.0,2022.0,9.0,2020.0,9.0,2020.0
3,3,ACoAAARi9fcBfH1GjbFHnWc1QqHftfh3LQjRUEg,Jaillot,Bastien,France,Greater Paris Metropolitan Region,"IT Consultant, Technical Expert, Lead develope...",IT Services and IT Consulting,Web expert chez Jolicode,[{'entityUrn': 'urn:li:fs_position:(ACoAAARi9f...,...,,0 [Management Consulting]\n1...,,,,2004.0,,,,
4,4,ACoAAApOhv4B3_GF-OGg8-ipxxkMf6AOggWigl8,Vanneroy,Coline,France,Greater Paris Metropolitan Region,,IT Services and IT Consulting,Directrice des opérations chez Cap Collectif,[{'entityUrn': 'urn:li:fs_position:(ACoAAApOhv...,...,2009.0,0 [Management Consulting]\n1...,11.0,50.0,,2005.0,,2002.0,,


## Exporting

### Removing names

In [128]:
profiles_experience.rename(columns={"index": "ID"}, inplace=True)

In [129]:
names=profiles_experience[["ID", "lastName", 'firstName']]
names.columns=["id", "lastName", 'firstName']

In [130]:
names.to_csv("names.csv", sep=';', index=False)

### Removing useless columns

In [131]:
a= pd.DataFrame(profiles_experience.isna().sum().sort_values(ascending=False))
a.columns=["column"]
coltodrop= list(a.loc[a['column']>len(profiles_experience)/2].index)
coltodrop


['ed3_startDate_month',
 'ed3_honors',
 'ed3_courses',
 'ed1_courses',
 'ed1_honors',
 'exp2_honors',
 'ed2_honors',
 'exp1_endDate_month',
 'exp5_honors',
 'exp1_endDate_year',
 'ed2_startDate_month',
 'ed3_activities',
 'exp1_honors',
 'ed3_grade',
 'ed1_startDate_month',
 'ed2_grade',
 'ed1_grade',
 'ed2_activities',
 'ed1_activities',
 'ed2_description',
 'ed3_description',
 'ed1_description',
 'ed3_fieldOfStudy']

In [134]:
# Dropping columns with too many missing values 
profiles_experience.drop(columns=coltodrop, inplace=True)
# and profile ID 
profiles_experience.drop(columns=['profile_id', "lastName", 'firstName'], inplace=True)
profiles_experience.head()

KeyError: "['ed3_startDate_month', 'ed3_honors', 'ed3_courses', 'ed1_courses', 'ed1_honors', 'exp2_honors', 'ed2_honors', 'exp1_endDate_month', 'exp5_honors', 'exp1_endDate_year', 'ed2_startDate_month', 'ed3_activities', 'exp1_honors', 'ed3_grade', 'ed1_startDate_month', 'ed2_grade', 'ed1_grade', 'ed2_activities', 'ed1_activities', 'ed2_description', 'ed3_description', 'ed1_description', 'ed3_fieldOfStudy'] not found in axis"

In [136]:
# and profile ID 
profiles_experience.drop(columns=["lastName", 'firstName'], inplace=True)
profiles_experience.head()

Unnamed: 0,ID,geoCountryName,geoLocationName,summary,industryName,headline,experience,education,languages,publications,...,exp5_startDate_month,exp5_startDate_year,exp5_endDate_month,exp5_endDate_year,exp5_industry,exp5_company_empl_low,exp5_company_empl_high,ed1_startDate_year,ed2_startDate_year,ed3_startDate_year
0,0,France,Greater Paris Metropolitan Region,Antoine croit à l’intelligence de tous et à la...,IT Services and IT Consulting,"Directeur associé de bluenove, initiateur du m...","[{'locationName': 'Paris Area, France', 'entit...",[{'entityUrn': 'urn:li:fs_education:(ACoAAAA61...,"[{'name': 'English', 'proficiency': 'FULL_PROF...","[{'date': {'month': 9, 'year': 2017, 'day': 1}...",...,2.0,2013.0,10.0,2017.0,0 [Management Consulting]\n1...,11.0,50.0,1998.0,1996.0,1993.0
1,1,France,"Paris, Île-de-France",French Entrepreneur - Founder and Managing Par...,Venture Capital and Private Equity Principals,Founder & Managing Partner at ROCH Ventures,[{'entityUrn': 'urn:li:fs_position:(ACoAAAfK9Y...,[{'entityUrn': 'urn:li:fs_education:(ACoAAAfK9...,[],[],...,4.0,2014.0,11.0,2017.0,0 [Management Consulting]\n1...,2.0,10.0,2006.0,2005.0,1998.0
2,2,France,"Paris, Île-de-France",Inspired by the power we can build to change t...,Civic and Social Organizations,Co-director Multitudes Foundation - Activist a...,[{'entityUrn': 'urn:li:fs_position:(ACoAAAUn_5...,[{'entityUrn': 'urn:li:fs_education:(ACoAAAUn_...,"[{'name': 'Anglais', 'proficiency': 'NATIVE_OR...","[{'date': {'month': 1, 'year': 2021, 'day': 21...",...,2.0,2018.0,10.0,2021.0,0 [Management Consulting]\n1...,2.0,10.0,2022.0,2020.0,2020.0
3,3,France,Greater Paris Metropolitan Region,"IT Consultant, Technical Expert, Lead develope...",IT Services and IT Consulting,Web expert chez Jolicode,[{'entityUrn': 'urn:li:fs_position:(ACoAAARi9f...,[{'entityUrn': 'urn:li:fs_education:(ACoAAARi9...,[{'name': 'Anglais'}],"[{'date': {'month': 2, 'year': 2015, 'day': 3}...",...,,,,,0 [Management Consulting]\n1...,,,2004.0,,
4,4,France,Greater Paris Metropolitan Region,,IT Services and IT Consulting,Directrice des opérations chez Cap Collectif,[{'entityUrn': 'urn:li:fs_position:(ACoAAApOhv...,[{'entityUrn': 'urn:li:fs_education:(ACoAAApOh...,[],[],...,,2008.0,,2009.0,0 [Management Consulting]\n1...,11.0,50.0,2005.0,2002.0,


In [137]:
profiles_experience.to_csv("profile_data_reprocessed.csv", sep=';', index=False)