# Setup

In [67]:
import pandas as pd
import pymysql.cursors
from sqlalchemy import create_engine, MetaData
from sqlalchemy.schema import CreateTable
from sqlalchemy import text
import getpass
import os

sql_pass = getpass.getpass()
#sql_pass = os.environ['MySQLPass']
connection_string = 'mysql+pymysql://root:' + sql_pass + '@localhost:3306/'
engine = create_engine(connection_string)


# People

### Names

In [68]:
names= pd.read_csv('../data/names.csv', sep=';')

In [69]:
names.to_sql('people_names', engine, 'civictech', if_exists='replace', index=False)

47

### Profiles

In [70]:
people=pd.read_csv('../profiles/people_main_db.csv', sep=';')
print(list(people.columns))
print(len(list(people.columns)))

['ind_id', 'geoCountryName', 'geoLocationName', 'summary', 'industryName', 'headline', 'experience', 'education', 'languages', 'publications', 'certifications', 'volunteer', 'honors', 'projects', 'experience1', 'experience2', 'experience3', 'experience4', 'experience5', 'education1', 'education2', 'education3', 'languages_over2', 'honors_stated', 'publications_stated', 'volunteer_stated', 'projects_stated', 'certifications_stated', 'languages_stated', 'consulting_roles', 'direction_roles', 'founding_roles']
32


In [71]:
# We may want to drop the columns with too many missing values.
pd.DataFrame(people.isna().sum().sort_values(ascending=False)).head(15)

Unnamed: 0,0
summary,15
education3,12
education2,6
geoLocationName,4
experience5,3
experience4,2
geoCountryName,2
education1,1
honors_stated,0
languages_over2,0


In [72]:
# and/or drop text columns 
people.dtypes.sort_values()

ind_id                    int64
consulting_roles          int64
languages_stated          int64
certifications_stated     int64
projects_stated           int64
volunteer_stated          int64
publications_stated       int64
honors_stated             int64
languages_over2           int64
direction_roles           int64
founding_roles            int64
volunteer                object
geoCountryName           object
geoLocationName          object
summary                  object
industryName             object
headline                 object
experience               object
education                object
languages                object
certifications           object
education3               object
education1               object
experience5              object
experience4              object
experience3              object
publications             object
experience1              object
projects                 object
honors                   object
education2               object
experien

In [73]:
people.head(3).T

Unnamed: 0,0,1,2
ind_id,0,1,2
geoCountryName,France,France,France
geoLocationName,Paris Metropolitan Region,Paris Metropolitan Region,Paris Metropolitan Region
summary,Antoine croit à l’intelligence de tous et à la...,French Entrepreneur - Founder and Managing Par...,Inspired by the power we can build to change t...
industryName,IT Services and IT Consulting,Venture Capital and Private Equity Principals,Civic and Social Organizations
headline,"Directeur associé de bluenove, initiateur du m...",Founder & Managing Partner at ROCH Ventures,Co-director Multitudes Foundation - Activist a...
experience,"[{'locationName': 'Paris Area, France', 'entit...",[{'entityUrn': 'urn:li:fs_position:(ACoAAAfK9Y...,[{'entityUrn': 'urn:li:fs_position:(ACoAAAUn_5...
education,[{'entityUrn': 'urn:li:fs_education:(ACoAAAA61...,[{'entityUrn': 'urn:li:fs_education:(ACoAAAfK9...,[{'entityUrn': 'urn:li:fs_education:(ACoAAAUn_...
languages,"[{'name': 'English', 'proficiency': 'FULL_PROF...",[],"[{'name': 'Anglais', 'proficiency': 'NATIVE_OR..."
publications,"[{'date': {'month': 9, 'year': 2017, 'day': 1}...",[],"[{'date': {'month': 1, 'year': 2021, 'day': 21..."


In [74]:
people.to_csv("people.csv", sep=";", index=False)

In [75]:
# for now we are keeping it as is
people.to_sql('people', engine, 'civictech', if_exists='replace', index=False)

47

### Experience

In [76]:
experience=pd.read_csv('../profiles/people_experience_db.csv', sep=';')

In [77]:
print(list(experience.columns))
print(len(list(experience.columns)))
print(len(experience))

['index', 'ind_id', 'locationName', 'companyName', 'description', 'title', 'startDate_month', 'startDate_year', 'endDate_month', 'endDate_year', 'industry', 'company_empl_low', 'company_empl_high', 'title_direction', 'titleconsulting', 'titlefounder']
16
235


In [78]:
experience.isna().sum().sort_values(ascending=False)

locationName         62
description          57
industry             20
companyName           5
title                 5
index                 0
ind_id                0
startDate_month       0
startDate_year        0
endDate_month         0
endDate_year          0
company_empl_low      0
company_empl_high     0
title_direction       0
titleconsulting       0
titlefounder          0
dtype: int64

In [79]:
experience.isna().sum().sort_values(ascending=False)

locationName         62
description          57
industry             20
companyName           5
title                 5
index                 0
ind_id                0
startDate_month       0
startDate_year        0
endDate_month         0
endDate_year          0
company_empl_low      0
company_empl_high     0
title_direction       0
titleconsulting       0
titlefounder          0
dtype: int64

In [80]:
experience['startDate_year'] = experience['startDate_year'].fillna(0)
experience['startDate_month'] = experience['startDate_month'].fillna(0)
experience['endDate_year'] = experience['endDate_year'].fillna(0)
experience['endDate_month'] = experience['endDate_month'].fillna(0)
experience['company_empl_low'] = experience['company_empl_low'].fillna(999)
experience['company_empl_high'] = experience['company_empl_high'].fillna(999)

In [81]:
experience['startDate_year'] = experience['startDate_year'].astype(int)
experience['startDate_month'] = experience['startDate_month'].astype(int)
experience['endDate_year'] = experience['endDate_year'].astype(int)
experience['endDate_month'] = experience['endDate_month'].astype(int)
experience['company_empl_low'] = experience['company_empl_low'].astype(int)
experience['company_empl_high'] = experience['company_empl_high'].astype(int)

In [82]:
experience.industry[1]

"['Management Consulting']"

In [83]:
import re
def clean_industry(x):
    x=str(x)
    pattern = "[a-zA-Z]+ [a-zA-Z]+|[a-zA-Z]+"
    a= re.findall(pattern, x)
    a= ' '.join(a)
    #a= a[1:-1]
    return a

In [84]:
print(list(experience['industry'].apply(clean_industry)))

['Management Consulting', 'Management Consulting', 'Management Consulting', 'Management Consulting', 'Management Consulting', 'Venture Capital Private Equity', 'Venture Capital Private Equity', 'Venture Capital Private Equity', 'Venture Capital Private Equity', 'Venture Capital Private Equity', 'Nonprofit Organization Management', 'Nonprofit Organization Management', 'Nonprofit Organization Management', 'Nonprofit Organization Management', 'Nonprofit Organization Management', 'nan', 'nan', 'nan', 'nan', 'nan', 'Computer Software', 'Computer Software', 'Computer Software', 'Computer Software', 'Computer Software', 'Internet', 'Internet', 'Internet', 'Internet', 'Internet', 'Information Technology and Services', 'Information Technology and Services', 'Information Technology and Services', 'Information Technology and Services', 'Information Technology and Services', 'Civic Social Organization', 'Civic Social Organization', 'Civic Social Organization', 'Civic Social Organization', 'Civic S

In [85]:
experience['industry']= experience['industry'].apply(clean_industry)

In [86]:
experience['industry'].value_counts()

Computer Software                      55
Information Technology and Services    45
Civic Social Organization              35
Management Consulting                  20
nan                                    20
Higher Education                       20
Internet                               10
Venture Capital Private Equity          5
Nonprofit Organization Management       5
Graphic Design                          5
Performing Arts                         5
Marketing and Advertising               5
Research                                5
Name: industry, dtype: int64

In [87]:
experience.drop(columns='index', inplace=True)
experience.head()

Unnamed: 0,ind_id,locationName,companyName,description,title,startDate_month,startDate_year,endDate_month,endDate_year,industry,company_empl_low,company_empl_high,title_direction,titleconsulting,titlefounder
0,0,Paris Metropolitan Region,bluenove,Bluenove accompagne la transformation positive...,Directeur associé,11,2017,0,0,Management Consulting,11,50,1,0,0
1,0,Paris Metropolitan Region,démocratie ouverte,Démocratie Ouverte est un collectif citoyen in...,Co-Président,1,2018,5,2020,Management Consulting,11,50,1,0,0
2,0,Paris Metropolitan Region,apm - association progrès du management,,Expert,1,2017,0,0,Management Consulting,11,50,0,0,0
3,0,,démocratie ouverte,,Membre du Comité d'Orientation Stratégique,7,2021,0,0,Management Consulting,11,50,0,0,0
4,0,Paris Metropolitan Region,dassault systèmes,Netvibes provides Dashboard Intelligence ~ tra...,"Senior Director, Strategic Business Development",2,2013,10,2017,Management Consulting,11,50,1,0,0


In [88]:
experience.reset_index(inplace=True)

In [89]:
experience.rename(columns={"index":"exp_id"}, inplace=True)

In [90]:
experience.dtypes

exp_id                int64
ind_id                int64
locationName         object
companyName          object
description          object
title                object
startDate_month       int64
startDate_year        int64
endDate_month         int64
endDate_year          int64
industry             object
company_empl_low      int64
company_empl_high     int64
title_direction       int64
titleconsulting       int64
titlefounder          int64
dtype: object

In [91]:
experience.columns

Index(['exp_id', 'ind_id', 'locationName', 'companyName', 'description',
       'title', 'startDate_month', 'startDate_year', 'endDate_month',
       'endDate_year', 'industry', 'company_empl_low', 'company_empl_high',
       'title_direction', 'titleconsulting', 'titlefounder'],
      dtype='object')

In [92]:
experience.head().T

Unnamed: 0,0,1,2,3,4
exp_id,0,1,2,3,4
ind_id,0,0,0,0,0
locationName,Paris Metropolitan Region,Paris Metropolitan Region,Paris Metropolitan Region,,Paris Metropolitan Region
companyName,bluenove,démocratie ouverte,apm - association progrès du management,démocratie ouverte,dassault systèmes
description,Bluenove accompagne la transformation positive...,Démocratie Ouverte est un collectif citoyen in...,,,Netvibes provides Dashboard Intelligence ~ tra...
title,Directeur associé,Co-Président,Expert,Membre du Comité d'Orientation Stratégique,"Senior Director, Strategic Business Development"
startDate_month,11,1,1,7,2
startDate_year,2017,2018,2017,2021,2013
endDate_month,0,5,0,0,10
endDate_year,0,2020,0,0,2017


In [93]:
experience.to_csv("experience.csv", sep=";", index=False)

In [94]:
experience.to_sql('people_experience',engine, 'civictech', if_exists='replace', index=False)# dtype={'locationName': string, 'companyName':'VARCHAR(500)', 'description':'VARCHAR(600)', 'title': 'VARCHAR(700)', 'industry':'VARCHAR(500)'})


235

### Education

In [95]:
education=pd.read_csv('../profiles/people_education_db.csv', sep=';')

In [96]:
print(list(education.columns))
print(len(list(education.columns)))
print(len(education))

['ind_id', 'school', 'degreeName', 'schoolName', 'fieldOfStudy', 'startDate_year']
6
141


In [97]:
education.isna().sum().sort_values(ascending=False)

fieldOfStudy      51
school            46
degreeName        44
startDate_year    30
schoolName        19
ind_id             0
dtype: int64

In [98]:
education.dtypes.sort_values(ascending=False)

school             object
degreeName         object
schoolName         object
fieldOfStudy       object
startDate_year    float64
ind_id              int64
dtype: object

In [99]:
education.startDate_year.value_counts(dropna=False)
education['startDate_year'] = education['startDate_year'].fillna(0)

In [100]:
education.head()

Unnamed: 0,ind_id,school,degreeName,schoolName,fieldOfStudy,startDate_year
0,0,"{'objectUrn': 'urn:li:school:19908', 'entityUr...",Master,ESCP Europe,Business/Managerial Economics,1998.0
1,0,,,Prépa Saint Jean de Douai,,1996.0
2,0,,Baccalauréat,Lycée Kernanec,Economics,1993.0
3,1,"{'objectUrn': 'urn:li:school:12330', 'entityUr...",,Ecole des Hautes Etudes Politiques,Relations Internationales et Sciences Politiques,2006.0
4,1,"{'objectUrn': 'urn:li:school:13392', 'entityUr...",,The Hebrew University,Relations et affaires internationales,2005.0


In [101]:
education['startDate_year']= education['startDate_year'].astype(int)

In [102]:
education.reset_index(inplace=True)

In [103]:
education.head()

Unnamed: 0,index,ind_id,school,degreeName,schoolName,fieldOfStudy,startDate_year
0,0,0,"{'objectUrn': 'urn:li:school:19908', 'entityUr...",Master,ESCP Europe,Business/Managerial Economics,1998
1,1,0,,,Prépa Saint Jean de Douai,,1996
2,2,0,,Baccalauréat,Lycée Kernanec,Economics,1993
3,3,1,"{'objectUrn': 'urn:li:school:12330', 'entityUr...",,Ecole des Hautes Etudes Politiques,Relations Internationales et Sciences Politiques,2006
4,4,1,"{'objectUrn': 'urn:li:school:13392', 'entityUr...",,The Hebrew University,Relations et affaires internationales,2005


In [104]:
education.rename(columns= {'index': "ed_id"}, inplace=True)

In [105]:
education.head().T

Unnamed: 0,0,1,2,3,4
ed_id,0,1,2,3,4
ind_id,0,0,0,1,1
school,"{'objectUrn': 'urn:li:school:19908', 'entityUr...",,,"{'objectUrn': 'urn:li:school:12330', 'entityUr...","{'objectUrn': 'urn:li:school:13392', 'entityUr..."
degreeName,Master,,Baccalauréat,,
schoolName,ESCP Europe,Prépa Saint Jean de Douai,Lycée Kernanec,Ecole des Hautes Etudes Politiques,The Hebrew University
fieldOfStudy,Business/Managerial Economics,,Economics,Relations Internationales et Sciences Politiques,Relations et affaires internationales
startDate_year,1998,1996,1993,2006,2005


In [106]:
education.to_csv("education.csv", sep=";", index=False)

In [107]:
education.to_sql('people_education', engine, 'civictech', if_exists='replace', index=False)

141

# Organizations

## Companies finance data

In [108]:
companies_finance=pd.read_csv('../organizations/companies_finance_data.csv', sep=';') 

In [109]:
companies_finance.head(20)

Unnamed: 0,company_year_ide,company,annee,chiffre_daffaires_e,marge_brute_e,resultat_dexploitation_e,resultat_net_e,taux_croissance_ca_pc,taux_marge_brute_pc,taux_marge_operationnelle_pc,...,fonds_propres_e,marge_nette_pc,rentabilite_sur_fonds_propres_pc,rentabilite_economique_pc,valeur_ajoutee_e,valeur_ajoutee_sur_ca_pc,salaires_et_charges_sociales_e,salaires_sur_ca_pc,impots_et_taxes_e,chiffre_daffaires_a_lexport_e
0,0,citility,2017.0,0.0,0,0,-562000,0,0,0,...,1380000,0,0,0,0,0,0,0,0,0
1,1,citility,2016.0,30700.0,527000,-296000,-238000,4,1720,-965,...,382000,-775,-623,-173,351000,1140,636000,2070,9160,0
2,2,voxcracy,2019.0,46300.0,46300,-38500,-39000,177,100,-833,...,513000,-842,-76,-63,2170,47,33600,725,137,0
3,3,voxcracy,2018.0,16700.0,157000,-130000,-112000,861,940,-776,...,552000,-668,-202,-169,-21000,-126,101000,602,1120,5000
4,4,voxcracy,2017.0,1740.0,75900,-30300,-23400,0,4360,-1740,...,569000,-1350,-41,-39,-5040,-290,19100,1100,329,0
5,5,voxcracy,2016.0,0.0,0,-4910,-4910,0,0,0,...,111,0,-4420,-177,-942,0,0,0,140,0
6,6,LLL,2020.0,473000.0,473000,-42700,-44900,25,100,-9,...,103000,-95,-437,-37,136000,287,97100,205,14200,0
7,7,LLL,2019.0,462000.0,462000,95200,62400,539,100,206,...,148000,135,423,86,292000,632,126000,274,7350,0
8,8,LLL,2018.0,300000.0,300000,25900,19800,0,100,86,...,85300,66,232,38,175000,584,80200,267,5220,0
9,9,LLL,2017.0,0.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [110]:
companies_finance.dtypes

company_year_ide                         int64
company                                 object
annee                                  float64
chiffre_daffaires_e                    float64
marge_brute_e                            int64
resultat_dexploitation_e                 int64
resultat_net_e                           int64
taux_croissance_ca_pc                    int64
taux_marge_brute_pc                      int64
taux_marge_operationnelle_pc             int64
bfr_e                                    int64
bfr_exploitation_e                       int64
bfr_hors_exploitation_e                  int64
bfr_j_ca                                 int64
bfr_exploitation_j_ca                    int64
bfr_hors_exploitation_j_ca               int64
delai_paiement_clients_j                 int64
delai_paiement_fournisseurs_j            int64
ratio_s_stocks_sur_ca_j                  int64
capacite_dautofinancement_e              int64
capacite_dautofinancement_sur_ca_pc      int64
fonds_rouleme

In [111]:
companies_finance.to_csv("companies_finance.csv", sep=";", index=False)

In [112]:
companies_finance.to_sql('companies_finance', engine, 'civictech', if_exists='replace', index=False)

74

## Organizations

In [113]:
companies_info=pd.read_csv('../organizations/companies_info_data.csv', sep=';')
companies_info.head(2)

Unnamed: 0,company_id,companyname,adresse,activite,effectif,creation,dirigeants,forme_juridique,inscription_au_rcs,capital_social,activite_principale_declaree,code_naf_ou_ape,domaine_dactivite,dirigeant,inscription_au_rna,identifiant_association,objet_de_lassociation,statut_insee
0,0,citility,5 RUE DE LA CLAIRE 69009 LYON 9EME,Édition de logiciels applicatifs,0 salarié (donnée 2019),05/05/2014,"André MAY, ODICEO, Sabine SCHNECK","SAS, société par actions simplifiée","INSCRIT (au greffe de LYON, le 12/05/2014)","39 620,00 €",Edition de logiciels applicatifs.,58.29C (Édition de logiciels applicatifs),Édition,,,,,
1,1,poligma,RPT BENJAMIN FRANKLIN 34960 MONTPELLIER CEDEX 2,Conseil en systèmes et logiciels informatiques,Entre 3 et 5 salariés (donnée 2020),01/09/2015,,"SAS, société par actions simplifiée","INSCRIT (au greffe de MONTPELLIER, le 11/09/2015)","20 944,00 €",Développement de services informatiques et num...,62.02A (Conseil en systèmes et logiciels infor...,"Programmation, conseil et autres activités inf...",Philippe GERARD,,,,


In [114]:
companies_info.dtypes

company_id                       int64
companyname                     object
adresse                         object
activite                        object
effectif                        object
creation                        object
dirigeants                      object
forme_juridique                 object
inscription_au_rcs              object
capital_social                  object
activite_principale_declaree    object
code_naf_ou_ape                 object
domaine_dactivite               object
dirigeant                       object
inscription_au_rna              object
identifiant_association         object
objet_de_lassociation           object
statut_insee                    object
dtype: object

In [115]:
#companies_info['creation'] = pd.to_datetime(companies_info['creation'], format="%Y/%m/%d")

### creating associations table

In [116]:
# to split companies and NGOs, just 
associations= pd.DataFrame(companies_info.loc[companies_info['activite']=="Autres organisations fonctionnant par adhésion volontaire"])

In [117]:
a= pd.DataFrame(associations.isna().sum().sort_values(ascending=False))
a.columns=["column"]
coltodrop= list(a.loc[a['column']>=len(associations)-1].index)
associations.drop(columns=coltodrop, inplace=True)
associations.drop(columns=['activite','forme_juridique', 'inscription_au_rcs', 'code_naf_ou_ape', 'domaine_dactivite', 'identifiant_association'], inplace=True)

In [118]:
associations.rename(columns={"company_id":"asso_id", "companyname":"association_name"}, inplace=True)

In [119]:
def clean_inscriptions(x):
    x=str(x)
    if "INSCRIT" in x:
        return 1
    else:
        return 0

In [120]:
associations["inscription_rna"]=associations['inscription_au_rna'].apply(clean_inscriptions)

In [121]:
def get_date(x):
    x=str(x)
    pattern=r"\d{1,5}/\d{2,5}/\d{2,5}"
    a= re.findall(pattern, x)
    a= ''.join(a).strip()
    return a

In [122]:
associations["date_inscr"]=pd.to_datetime(associations['inscription_au_rna'].apply(get_date), dayfirst=True)

In [123]:
associations.drop(columns="inscription_au_rna", inplace=True)

In [124]:
def get_post_code(x):
    x=str(x)
    pattern=r"\d{5}"
    a= re.findall(pattern, x)
    a= ''.join(a).strip()
    return a

In [125]:
associations["post_code"]= associations["adresse"].apply(get_post_code)

In [126]:
associations["country"]= "France"

In [127]:
associations.head()

Unnamed: 0,asso_id,association_name,adresse,effectif,creation,objet_de_lassociation,inscription_rna,date_inscr,post_code,country
6,6,make4,14 RUE ST GUILLAUME 75007 PARIS 7,Entre 3 et 5 salariés (donnée 2020),11/01/2017,,0,NaT,75007,France
8,8,voteetvous,133 RUE ST DOMINIQUE 75007 PARIS 7,Au moins 1 salarié (donnée 2023),27/01/2014,"Renforcer l'exercice démocratique du vote, en ...",1,2014-01-27,75007,France
26,26,lesbricodeurs,8 PL LOUIS CHAZETTE 69001 LYON 1ER,Entre 1 et 2 salariés (donnée 2020),30/04/2016,Diffuser la culture numérique et accompagner d...,1,2015-11-05,69001,France
38,38,democracyos,17 RUE MYRHA 75018 PARIS 18,Au moins 1 salarié (donnée 2023),21/04/2015,Représenter la communauté qui développe et qui...,1,2015-04-21,75018,France
39,39,polipart,70 BD DE CLICHY 75018 PARIS 18,0 salarié (donnée 2023),09/03/2019,"Améliorer la relation entre institutions, élus...",1,2019-03-09,75018,France


### Companies table

In [128]:
companies= pd.DataFrame(companies_info.loc[companies_info['activite']!="Autres organisations fonctionnant par adhésion volontaire"])

In [129]:
companies.drop(columns=['inscription_au_rna', 'identifiant_association', 'objet_de_lassociation', 'statut_insee'], inplace=True)

In [130]:
#companies.inscription_au_rcs.value_counts()

In [131]:
companies["inscription_rcs"]=companies['inscription_au_rcs'].apply(clean_inscriptions)

In [132]:
import re

def greffe(x):
     x=str(x)
     pattern = " [A-Z]+"
     a= re.findall(pattern, x)
     a= ''.join(a).strip()
     return a

In [133]:
companies["city"]= companies['inscription_au_rcs'].apply(greffe)

In [134]:
companies["date_inscr_rad"]=pd.to_datetime(companies['inscription_au_rcs'].apply(get_date), dayfirst=True)

In [135]:
def select_appr_col(x, y): 
    x=str(x)
    y=str(y)
    if x=="NaN":
        return y
    elif y=="Nan":
        return x

In [136]:
companies["directors"]=companies["dirigeants"].astype(str).replace("nan", '')+companies["dirigeant"].astype(str).replace("nan", '')

In [137]:
companies.drop(columns=["dirigeants", "dirigeant", "inscription_au_rcs"], inplace=True)

In [138]:
import re
def fix_K_social(x):
    x=str(x)
    x.replace('€', '')
    x.replace(' ', '')
    if "," in x: 
        y= re.split(',', x)[0]
        y= re.split(' ', y)
        return "".join(y)
    else:
        return 0


In [139]:
#test line 
# companies.capital_social.apply(fix_K_social).astype(int)

In [140]:
companies["capital_social"]= companies.capital_social.apply(fix_K_social).astype(int)

In [141]:
print(list(companies.companyname))

['citility', 'poligma', 'voxcracy', 'LLL_2', 'bluenove', 'ecivis', 'civimetric', 'vooter', 'organigram_97', 'forcity_mere', 'mymairie', 'quorum', 'fullmobs', 'mairesetcitoyens', 'parlonspo', 'make3', 'consultvox', 'loomio', 'politiker', 'citizenlabbv', 'datagora', 'ouiville', 'forcity', 'accromedias', 'crowdpac', 'ledrenche', 'youvote', 'make2', 'ecreall_nova_ideo', 'neocity', 'citieszen', 'politicus', 'capcollectif', 'whip', 'voxestudio', 'LLL', 'citybay_ambitions', 'politizr', 'digitalebox', 'mesopinions', 'madeinvote', 'kawaa', 'ciwik', 'digiworks', 'equivote_myopencity', 'make', 'kohero', 'openagora', 'moneparti', 'nousrassemble', 'bittle', 'marston_massiet', 'koom', 'reperage_urbain', 'D21', 'IDcity', 'Stig', 'opensourcepolitics', 'GOV', 'fluicity', 'civocracy']


In [142]:
companies['companyname'] = companies['companyname'].replace(['LLL', 'make', 'citizenlabbv', 'voxestudio', 'marston_massiet'], ['liberté_living_lab', 'make.org', 'citizenlobbv', 'VOXE', 'marston'])

In [143]:
companies["post_code"]= companies["adresse"].apply(get_post_code)

In [151]:
companies["country"]="France"

### export to sql and csv

In [152]:
companies.to_csv("companies_info.csv", sep=";", index=False)

In [146]:
companies.to_sql('companies_info', engine, 'civictech', if_exists='replace', index=False)

61

In [147]:
associations.to_csv("associations_info.csv", sep=';', index=False)

In [148]:
associations.to_sql("associations_info", engine, 'civictech', if_exists='replace', index=False)

15

In [149]:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html

# you can pass data types here 
# from sqlalchemy.types import Integer
# df.to_sql('integers', con=engine, index=False,
#           dtype={"A": Integer()})