# DC DATA - Collection / Cleaning / Preprocessing

## Imports

In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import os
from sqlalchemy import create_engine
from DealMatch.database_connector import db_connector
import os
from dotenv import load_dotenv

%load_ext autoreload
%autoreload 2

In [2]:
pd.set_option('display.max_columns', 500)

In [4]:
dbConnection = db_connector()

## DC Database queries

In [30]:
query_investor_locations = """
        SELECT * 
        FROM companies c 
        LEFT JOIN invest_profiles ip ON ip.company_id = c.id
        LEFT JOIN invest_profile_locations ipl ON ipl.invest_profile_id = ip.id 
        LEFT JOIN invest_profile_countries ipc ON ipc.invest_profile_location_id = ipl.id 
        LEFT JOIN countries c2 ON c2.id = ipc.country_id 
        WHERE c.company_category_id = 1; 
"""

In [31]:
query_investor_deals = """
        SELECT * 
        FROM companies c 
        LEFT JOIN deals d ON d.company_id = c.id 
        LEFT JOIN deal_stages ds ON ds.id = d.deal_stage_id 
        LEFT JOIN deal_types dt ON dt.id = d.deal_type_id 
        LEFT JOIN deal_lost_reasons dlr ON dlr.id = d.lost_reason_id 
        JOIN deal_sectors ds2 ON ds2.deal_id = d.id 
        JOIN sectors s ON s.id = ds2.sector_id 
        JOIN deal_subsectors ds3 ON ds2.id = ds3.deal_sector_id 
        JOIN subsectors s2 ON s2.id = ds3.subsector_id 
        WHERE c.company_category_id = 1;
"""

In [33]:
query_invest_profile_keywords = """
        SELECT * 
        FROM companies c 
        LEFT JOIN invest_profiles ip ON ip.company_id = c.id
        LEFT JOIN invest_profile_keywords ipk ON ipk.invest_profile_id = ip.id 
        LEFT JOIN keywords k ON k.id = ipk.keyword_id 
        WHERE c.company_category_id = 1;
"""

In [34]:
query_invest_profile_sectors_subsectors = """
        SELECT * 
        FROM companies c 
        LEFT JOIN invest_profiles ip ON ip.company_id = c.id
        LEFT JOIN invest_profile_sectors ips ON ips.invest_profile_id = ip.id 
        LEFT JOIN sectors s ON s.id = ips.sector_id 
        LEFT JOIN invest_profile_subsectors ips2 ON ips2.invest_profile_sector_id = ips.id 
        LEFT JOIN subsectors s2 ON s2.id = ips2.subsector_id 
        WHERE c.company_category_id = 1;
"""

In [35]:
query_invest_profile_transaction = """
        SELECT * 
        FROM companies c 
        LEFT JOIN invest_profiles ip ON ip.company_id = c.id
        LEFT JOIN invest_profile_tx_types iptt ON iptt.invest_profile_id = ip.id 
        LEFT JOIN tx_types tt ON tt.id = iptt.tx_type_id 
        LEFT JOIN invest_profile_stake_types ipst ON ipst.invest_profile_id = ip.id 
        LEFT JOIN stake_types st ON st.id = ipst.stake_type_id 
        WHERE c.company_category_id = 1;
"""

## RAW Dataframes

In [28]:
investor_locations = pd.read_sql(query_investor_locations, dbConnection);
investor_locations.head()

Unnamed: 0,id,name,description,short_description,products_description,company_category_id,number_of_employees,ebit,ebitda,revenue,bs_total,website,origin_country_id,origin_region_id,zipcode,database_import_id,company_source_id,company_state_id,fees_rate,fees_cap,fees_floor,fees_fa,fees_info,inroad,exit_oriented,management_takeover,seriousness,agnostic,margin,crm_id,crm_created_at,crm_updated_at,crm_synced_at,verified_at,verified_by,parent_company_id,ma_history,created_at,updated_at,deleted_at,is_deleted,company_inroad_id,id.1,company_id,name.1,max_revenue,min_revenue,max_ebitda,min_ebitda,max_equity,min_equity,searchmandate,created_at.1,updated_at.1,deleted_at.1,is_deleted.1,id.2,invest_profile_id,continent_id,is_excluded,created_at.2,updated_at.2,deleted_at.2,is_deleted.2,id.3,invest_profile_location_id,country_id,is_excluded.1,created_at.3,updated_at.3,deleted_at.3,is_deleted.3,id.4,iso_alpha2,iso_alpha3,name_en,name_de,name_fr,name_es,phone_country_code,continent_id.1
0,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,1.0,6.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,5.0,1.0,40.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,40.0,AT,AUT,Austria,Österreich,Autriche,Austria,43,6.0
1,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,1.0,6.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,8.0,1.0,208.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,208.0,DK,DNK,Denmark,Dänemark,Danemark,Dinamarca,45,6.0
2,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,1.0,6.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,4.0,1.0,246.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,246.0,FI,FIN,Finland,Finnland,Finlande,Finlandia,358,6.0
3,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,1.0,6.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,11.0,1.0,250.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,250.0,FR,FRA,France,Frankreich,France,Francia,33,6.0
4,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,1.0,6.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,1.0,276.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,276.0,DE,DEU,Germany,Deutschland,Allemagne,Alemania,49,6.0


In [32]:
investor_deals = pd.read_sql(query_investor_deals, dbConnection);
investor_deals.head()

Unnamed: 0,id,name,description,short_description,products_description,company_category_id,number_of_employees,ebit,ebitda,revenue,bs_total,website,origin_country_id,origin_region_id,zipcode,database_import_id,company_source_id,company_state_id,fees_rate,fees_cap,fees_floor,fees_fa,fees_info,inroad,exit_oriented,management_takeover,seriousness,agnostic,margin,crm_id,crm_created_at,crm_updated_at,crm_synced_at,verified_at,verified_by,parent_company_id,ma_history,created_at,updated_at,deleted_at,is_deleted,company_inroad_id,id.1,company_id,name.1,deal_type_id,deal_stage_id,is_lost,lost_reason_id,comment,target_company_id,target_name,target_description,target_revenue,target_ebitda,target_ebit,target_country_id,target_region_id,target_zipcode,crm_id.1,parent_crm_id,crm_created_at.1,crm_updated_at.1,crm_synced_at.1,created_at.1,updated_at.1,deleted_at.1,is_deleted.1,lead_source_id,lead_prio,id.2,name.2,id.3,name.3,id.4,name.4,id.5,deal_id,sector_id,created_at.2,updated_at.2,deleted_at.2,is_deleted.2,id.6,name_en,name_de,id.7,deal_sector_id,subsector_id,created_at.3,updated_at.3,deleted_at.3,is_deleted.3,id.8,sector_id.1,name_en.1,name_de.1
0,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1,1,1301 Sun,4.0,2,1,0.0,,,Sun [Target],•\t350MWp of solar photovoltaic project assets...,,,,,,-,3611818364,3586205129,2020-12-14 08:35:39,2021-09-09 06:48:37,2021-10-14 13:47:07,2021-10-14 13:47:13,2021-10-21 07:59:09,,0,,,2,APPROACH_SENT,4.0,OTHER,0.0,NOT_SPECIFIED,1,1,1,2021-10-14 13:47:13,2021-10-14 13:47:13,,0,1,Energy,Energie,1,1,236,2021-10-14 13:47:13,2021-10-14 13:47:13,,0,236,1,Solar power,Solarenergie
1,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,2,1,1220 Supple,4.0,2,1,6.0,,,Supple [Target],Solaranlage in Kreta,57.7,,,,,,3328836718,3292812393,2020-11-06 19:24:26,2021-09-08 08:36:44,2021-10-14 13:47:07,2021-10-14 13:47:13,2021-10-21 08:34:03,,0,,,2,APPROACH_SENT,4.0,OTHER,6.0,NO_REGION_FIT,2,2,1,2021-10-14 13:47:13,2021-10-14 13:47:13,,0,1,Energy,Energie,3,2,236,2021-10-14 13:47:13,2021-10-14 13:47:13,,0,236,1,Solar power,Solarenergie
2,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,2,1,1220 Supple,4.0,2,1,6.0,,,Supple [Target],Solaranlage in Kreta,57.7,,,,,,3328836718,3292812393,2020-11-06 19:24:26,2021-09-08 08:36:44,2021-10-14 13:47:07,2021-10-14 13:47:13,2021-10-21 08:34:03,,0,,,2,APPROACH_SENT,4.0,OTHER,6.0,NO_REGION_FIT,2,2,1,2021-10-14 13:47:13,2021-10-14 13:47:13,,0,1,Energy,Energie,2,2,238,2021-10-14 13:47:13,2021-10-14 13:47:13,,0,238,1,Renewable energy - other,Erneuerbare Energie - sonstige
3,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,3,1,1677 Heat,0.0,2,0,,,,SPH Sustainable Process Heat GmbH,PROJECT HEAT hat eine neue Wärmepumpentechnolo...,0.43,-0.78,-0.78,,,-,5684406142,5631804235,2021-07-14 15:12:30,2021-09-21 16:52:34,2021-10-14 13:47:08,2021-10-14 13:47:14,2021-10-19 17:04:18,,0,,,2,APPROACH_SENT,0.0,MAJORITY,,,3,3,9,2021-10-14 13:47:14,2021-10-14 13:47:14,,0,9,Industrial products and services,Industrielle Produkte & Dienstleistungen,4,3,72,2021-10-14 13:47:14,2021-10-14 13:47:14,,0,72,9,Pumps and compressors,Pumpen und Armaturen
4,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,4,1,845 Apollo,4.0,2,0,,,,SUMMIQ AG,Fundraising für Renewable Holding,0.0,0.0,0.0,276.0,3.0,80335,1834762570,1712230934,2020-04-06 13:34:36,2021-10-09 05:16:20,2021-10-14 13:47:10,2021-10-14 13:47:14,2021-10-19 17:04:18,,0,,,2,APPROACH_SENT,4.0,OTHER,,,4,4,23,2021-10-14 13:47:14,2021-10-14 13:47:14,,0,23,Financial Services,Finanzdienstleistungen,5,4,182,2021-10-14 13:47:14,2021-10-14 13:47:14,,0,182,23,Other Diversified Financial Services,Finanzdienstleistungen - sonstige


In [36]:
invest_profile_keywords = pd.read_sql(query_invest_profile_keywords, dbConnection);
invest_profile_keywords.head()

Unnamed: 0,id,name,description,short_description,products_description,company_category_id,number_of_employees,ebit,ebitda,revenue,bs_total,website,origin_country_id,origin_region_id,zipcode,database_import_id,company_source_id,company_state_id,fees_rate,fees_cap,fees_floor,fees_fa,fees_info,inroad,exit_oriented,management_takeover,seriousness,agnostic,margin,crm_id,crm_created_at,crm_updated_at,crm_synced_at,verified_at,verified_by,parent_company_id,ma_history,created_at,updated_at,deleted_at,is_deleted,company_inroad_id,id.1,company_id,name.1,max_revenue,min_revenue,max_ebitda,min_ebitda,max_equity,min_equity,searchmandate,created_at.1,updated_at.1,deleted_at.1,is_deleted.1,id.2,invest_profile_id,keyword_id,created_at.2,updated_at.2,deleted_at.2,is_deleted.2,id.3,name_de,created_at.3,updated_at.3,deleted_at.3,is_deleted.3
0,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,4487.0,1.0,3.0,2021-10-19 17:04:17,2021-10-19 17:04:17,,0.0,3.0,energie,2021-10-14 13:47:07,2021-10-14 13:47:07,,0.0
1,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,4494.0,1.0,196.0,2021-10-19 17:04:17,2021-10-19 17:04:17,,0.0,196.0,heating,2021-10-14 13:48:18,2021-10-14 13:48:18,,0.0
2,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,4491.0,1.0,470.0,2021-10-19 17:04:17,2021-10-19 17:04:17,,0.0,470.0,led,2021-10-14 13:51:02,2021-10-14 13:51:02,,0.0
3,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,4489.0,1.0,1843.0,2021-10-19 17:04:17,2021-10-19 17:04:17,,0.0,1843.0,erneuerbare energien,2021-10-14 14:10:13,2021-10-14 14:10:13,,0.0
4,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,4488.0,1.0,3994.0,2021-10-19 17:04:17,2021-10-19 17:04:17,,0.0,3994.0,wärmerückgewinnung,2021-10-14 15:15:28,2021-10-14 15:15:28,,0.0


In [37]:
invest_profile_sectors_subsectors = pd.read_sql(query_invest_profile_sectors_subsectors, dbConnection);
invest_profile_sectors_subsectors.head()

Unnamed: 0,id,name,description,short_description,products_description,company_category_id,number_of_employees,ebit,ebitda,revenue,bs_total,website,origin_country_id,origin_region_id,zipcode,database_import_id,company_source_id,company_state_id,fees_rate,fees_cap,fees_floor,fees_fa,fees_info,inroad,exit_oriented,management_takeover,seriousness,agnostic,margin,crm_id,crm_created_at,crm_updated_at,crm_synced_at,verified_at,verified_by,parent_company_id,ma_history,created_at,updated_at,deleted_at,is_deleted,company_inroad_id,id.1,company_id,name.1,max_revenue,min_revenue,max_ebitda,min_ebitda,max_equity,min_equity,searchmandate,created_at.1,updated_at.1,deleted_at.1,is_deleted.1,id.2,invest_profile_id,sector_id,is_excluded,created_at.2,updated_at.2,deleted_at.2,is_deleted.2,id.3,name_en,name_de,id.4,invest_profile_sector_id,subsector_id,is_excluded.1,created_at.3,updated_at.3,deleted_at.3,is_deleted.3,id.5,sector_id.1,name_en.1,name_de.1
0,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059.0,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,3.0,1.0,1.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,Energy,Energie,3.0,3.0,10.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,10.0,1.0,Energy storage,Energiespeicherung
1,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059.0,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,2.0,1.0,9.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,9.0,Industrial products and services,Industrielle Produkte & Dienstleistungen,2.0,2.0,77.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,77.0,9.0,Ventilation / Heating / Air conditioning,Heizung / Klima / Lüftung
2,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059.0,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,1.0,17.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,17.0,Retailing,Handel,1.0,1.0,134.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,134.0,17.0,Electrical appliances,Computer und Elektronik Einzelhandel
3,5,Daniel Vogel,,,,1,,,,,,,276.0,,,1.0,,0,0.02,,,,,4.0,1.0,0.0,2.0,0.0,0.0,,NaT,NaT,NaT,2021-10-14 13:47:14,,,,2020-01-10 23:00:00,2020-01-10 23:00:00,,0,0,5.0,5.0,,,,,,,1.0,0.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,7.0,5.0,26.0,0.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,26.0,Internet/ecommerce,Internet/ E-Commerce,,,,,NaT,NaT,,,,,,
4,5,Daniel Vogel,,,,1,,,,,,,276.0,,,1.0,,0,0.02,,,,,4.0,1.0,0.0,2.0,0.0,0.0,,NaT,NaT,NaT,2021-10-14 13:47:14,,,,2020-01-10 23:00:00,2020-01-10 23:00:00,,0,0,5.0,5.0,,,,,,,1.0,0.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,8.0,5.0,27.0,0.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,27.0,IT services,IT Services,8.0,8.0,199.0,0.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,199.0,27.0,IT Consulting,IT-Beratung


In [38]:
invest_profile_transaction = pd.read_sql(query_invest_profile_transaction, dbConnection);
invest_profile_transaction.head()

Unnamed: 0,id,name,description,short_description,products_description,company_category_id,number_of_employees,ebit,ebitda,revenue,bs_total,website,origin_country_id,origin_region_id,zipcode,database_import_id,company_source_id,company_state_id,fees_rate,fees_cap,fees_floor,fees_fa,fees_info,inroad,exit_oriented,management_takeover,seriousness,agnostic,margin,crm_id,crm_created_at,crm_updated_at,crm_synced_at,verified_at,verified_by,parent_company_id,ma_history,created_at,updated_at,deleted_at,is_deleted,company_inroad_id,id.1,company_id,name.1,max_revenue,min_revenue,max_ebitda,min_ebitda,max_equity,min_equity,searchmandate,created_at.1,updated_at.1,deleted_at.1,is_deleted.1,id.2,invest_profile_id,tx_type_id,is_excluded,created_at.2,updated_at.2,deleted_at.2,is_deleted.2,id.3,name.2,id.4,invest_profile_id.1,stake_type_id,created_at.3,updated_at.3,deleted_at.3,is_deleted.3,id.5,name.3
0,1,Greencoat Capital,,,,1,,,,,,https://www.greencoat-capital.com/,826.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,1187474059.0,NaT,2021-08-01 17:17:44,2021-10-14 13:47:05,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:14,,0,0,1.0,1.0,,50.0,10.0,,,100.0,25.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,1.0,1.0,0.0,2021-10-14 13:47:03,2021-10-14 13:47:03,,0.0,1.0,GROWTH,,,,NaT,NaT,,,,
1,5,Daniel Vogel,,,,1,,,,,,,276.0,,,1.0,,0,0.02,,,,,4.0,1.0,0.0,2.0,0.0,0.0,,NaT,NaT,NaT,2021-10-14 13:47:14,,,,2020-01-10 23:00:00,2020-01-10 23:00:00,,0,0,5.0,5.0,,,,,,,1.0,0.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,2.0,5.0,2.0,0.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,2.0,SUCCESSION,1.0,5.0,1.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,1.0,MAJORITY
2,6,Richmond View Ventures GmbH,,,,1,,,,,,https://rvv.tv/,276.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,0.0,0.0,,NaT,NaT,NaT,NaT,,,,2020-01-10 23:00:00,2020-01-10 23:00:00,,0,0,6.0,6.0,,,,,,,,0.0,2021-10-14 13:47:16,2021-10-14 13:47:16,,0.0,3.0,6.0,0.0,0.0,2021-10-14 13:47:16,2021-10-14 13:47:16,,0.0,0.0,SEED,2.0,6.0,0.0,2021-10-14 13:47:15,2021-10-14 13:47:15,,0.0,0.0,MINORITY
3,7,AL Capital Holding GmbH & Co. KG,,,,1,,,,,,,276.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,1.0,3.0,4976737652.0,NaT,2021-10-14 10:07:09,2021-10-14 13:47:18,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:58,,0,0,7.0,7.0,,,,,,15.0,2.0,0.0,2021-10-14 13:47:16,2021-10-14 13:47:16,,0.0,4.0,7.0,2.0,0.0,2021-10-14 13:47:16,2021-10-14 13:47:16,,0.0,2.0,SUCCESSION,3.0,7.0,1.0,2021-10-14 13:47:16,2021-10-14 13:47:16,,0.0,1.0,MAJORITY
4,7,AL Capital Holding GmbH & Co. KG,,,,1,,,,,,,276.0,,,1.0,,0,0.02,,,,,0.0,1.0,0.0,2.0,1.0,3.0,4976737652.0,NaT,2021-10-14 10:07:09,2021-10-14 13:47:18,NaT,,,,2020-01-10 23:00:00,2021-10-14 13:47:58,,0,0,7.0,7.0,,,,,,15.0,2.0,0.0,2021-10-14 13:47:16,2021-10-14 13:47:16,,0.0,5.0,7.0,3.0,0.0,2021-10-14 13:47:16,2021-10-14 13:47:16,,0.0,3.0,CARVE_OUT,3.0,7.0,1.0,2021-10-14 13:47:16,2021-10-14 13:47:16,,0.0,1.0,MAJORITY


### Dataframe Shapes

In [39]:
print(f"Invest-profile preferred locations df: {investor_locations.shape}")

Invest-profile preferred locations df: (94889, 81)


In [40]:
print(f"Investor Deals df: {investor_deals.shape}")

Investor Deals df: (28653, 97)


In [41]:
print(f"Invest-profile preferred keywords df: {invest_profile_keywords.shape}")

Invest-profile preferred keywords df: (21631, 69)


In [42]:
print(f"Invest-profile preferred sectors / subsectors df: {invest_profile_sectors_subsectors.shape}")

Invest-profile preferred sectors / subsectors df: (15602, 79)


In [43]:
print(f"Invest-profile preferred transaction & stake types df: {invest_profile_transaction.shape}")

Invest-profile preferred transaction & stake types df: (11207, 75)
