In [4]:
from sqlalchemy import create_engine
import pandas as pd
import requests

In [5]:
def acquire_db():

    # 1/3 Connection to the db

    sqlitedb_rel_path = '../data/processed/raw_data_project_m1.db'
    conn_str = f'sqlite:///{sqlitedb_rel_path}'
    engine = create_engine(conn_str)

    # Use dBeaver to cleanup a few fields i.e. gender, then make a DataFrame:

    sql_query = """ 
    SELECT country_info.uuid,
           country_info.country_code,
           career_info.dem_education_level,
           career_info.normalized_job_code, 
           personal_info.age, 
           personal_info.gender,
           poll_info.question_bbi_2016wave4_basicincome_vote,
           poll_info.question_bbi_2016wave4_basicincome_argumentsagainst,
       poll_info.question_bbi_2016wave4_basicincome_argumentsfor 
    FROM country_info
    JOIN career_info ON country_info.uuid = career_info.uuid
    JOIN personal_info ON personal_info.uuid = career_info.uuid
    JOIN poll_info ON poll_info.uuid = personal_info.uuid
    """

    poll_db = pd.read_sql_query(sql_query, engine)

    
    return poll_db

In [9]:
poll_db = acquire_db()
poll_db

Unnamed: 0,uuid,country_code,dem_education_level,normalized_job_code,age,gender,question_bbi_2016wave4_basicincome_vote,question_bbi_2016wave4_basicincome_argumentsagainst,question_bbi_2016wave4_basicincome_argumentsfor
0,f6e7ee00-deac-0133-4de8-0a81e8b09a82,AT,no,,61 years old,male,I would not vote,None of the above,None of the above
1,54f0f1c0-dda1-0133-a559-0a81e8b09a82,AT,high,861a9b9151e11362eb3c77ca914172d0,57 years old,male,I would probably vote for it,It might encourage people to stop working,It increases appreciation for household work a...
2,83127080-da3d-0133-c74f-0a81e8b09a82,AT,,,32 years old,male,I would not vote,Foreigners might come to my country and take a...,It creates more equality of opportunity
3,15626d40-db13-0133-ea5c-0a81e8b09a82,AT,high,049a3f3a2b5f85cb2971ba77ad66e10c,45 years old,male,I would probably vote for it,None of the above,It reduces anxiety about financing basic needs
4,24954a70-db98-0133-4a64-0a81e8b09a82,AT,high,f4b2fb1aa40f661488e2782b6d57ad2f,41 years old,female,I would probably vote for it,It is impossible to finance | It might encoura...,It reduces anxiety about financing basic needs
...,...,...,...,...,...,...,...,...,...
9644,7d1ac020-dcb4-0133-817a-0a81e8b09a82,SK,high,847165cfda6b1dc82ae22b967da8af2f,37 years old,female,I would probably vote for it,It is impossible to finance,It reduces bureaucracy and administrative expe...
9645,39f989f0-db52-0133-8482-0a81e8b09a82,SK,high,a4d5b8b38f9513825d0d94a981ebe962,53 years old,male,I would probably vote against it,It might encourage people to stop working | On...,It reduces bureaucracy and administrative expe...
9646,70ce4a90-d965-0133-f5e4-0a81e8b09a82,SK,low,,24 years old,male,I would not vote,None of the above,It reduces anxiety about financing basic needs
9647,2896e440-db3c-0133-5b67-0a81e8b09a82,SK,low,775190277a849cba701b306a7b374c0a,47 years old,male,I would vote for it,Foreigners might come to my country and take a...,It reduces bureaucracy and administrative expe...


In [15]:
def acquire_api():
    
    # 1/3 Connection to the db

    sqlitedb_rel_path = '../data/processed/raw_data_project_m1.db'
    conn_str = f'sqlite:///{sqlitedb_rel_path}'
    engine = create_engine(conn_str)

    # Use dBeaver to cleanup a few fields i.e. gender, then make a DataFrame:

    sql_query = """ 
    SELECT country_info.uuid,
           country_info.country_code,
           career_info.dem_education_level,
           career_info.normalized_job_code, 
           personal_info.age, 
           personal_info.gender,
           poll_info.question_bbi_2016wave4_basicincome_vote,
           poll_info.question_bbi_2016wave4_basicincome_argumentsagainst,
       poll_info.question_bbi_2016wave4_basicincome_argumentsfor 
    FROM country_info
    JOIN career_info ON country_info.uuid = career_info.uuid
    JOIN personal_info ON personal_info.uuid = career_info.uuid
    JOIN poll_info ON poll_info.uuid = personal_info.uuid
    """
    # 2/3 Extract job skills and description table using APIs
    # Make a list with job ids from the csv, and use it to get job names from website (open skills api)

    poll_db = pd.read_sql_query(sql_query, engine)

    poll_job_code = poll_db['normalized_job_code'].unique().tolist()

    url_list = []
    for i in poll_job_code:
        url_list.append(f'http://api.dataatwork.org/v1/jobs/{i}/related_skills')

    # Make a DataFrame with the collection of results.

    lst = []
    for url in url_list:
        response = requests.get(url)
        json_data = response.json()
        lst.append(json_data)

    api_skills = pd.DataFrame(lst)

    # Remove nulls

    api_skills.drop(index=api_skills[api_skills.skills.isnull()].index, inplace=True)

    #Formula to get the first skill, which is always the one with highest 'importance' value

    def get_max(x):
        top_skill = pd.json_normalize(x).iloc[0]['skill_name']
        return top_skill
    
    return api_skills

In [16]:
api_skills = acquire_api()
api_skills

Unnamed: 0,error,job_uuid,job_title,normalized_job_title,skills
1,,861a9b9151e11362eb3c77ca914172d0,Automatic Data Processing Planner,automatic data processing planner,[{'skill_uuid': 'b3cb1294905e001d3d611bff1de39...
2,,049a3f3a2b5f85cb2971ba77ad66e10c,Data Coordinator,data coordinator,[{'skill_uuid': 'e1a8c649e57ad7b1cfeef1aad5ba5...
3,,f4b2fb1aa40f661488e2782b6d57ad2f,Database Developer,database developer,[{'skill_uuid': 'b3cb1294905e001d3d611bff1de39...
4,,27af8700f5577cec835acee2cb90a2ff,Data Entry Specialist,data entry specialist,[{'skill_uuid': '162cb662c7ef4dc9e86a2b5e88cbc...
5,,c1b670eba9ccb65e7c99f7da116d5b9c,Database Architect,database architect,[{'skill_uuid': 'b3cb1294905e001d3d611bff1de39...
...,...,...,...,...,...
152,,b0fa6ede410f50b82ab74f5a705fe699,Analytical Data Miner,analytical data miner,[{'skill_uuid': '20784bf09c9fe614603ad635e6093...
153,,559a21f836c93876f31b60e6d10656a7,Data Analysis Assistant,data analysis assistant,[{'skill_uuid': 'e72f8046dc4d704b6d1ca41dada93...
154,,c1fb1a01b78373ac2153c66fa08d16dc,Data Examination Clerk,data examination clerk,[{'skill_uuid': '500018df958f2b9f8b387cf5637a0...
155,,05bb9a333a66d6eb151e253623efe1c0,Data Entry Clerk,data entry clerk,[{'skill_uuid': '162cb662c7ef4dc9e86a2b5e88cbc...


In [30]:
def web():

# 3/3 Get country names with web scraping

    url = 'https://ec.europa.eu/eurostat/statistics-explained/index.php/Glossary:Country_codes'
    html = requests.get(url).content

    # Make the soup

    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')

    # Assign the correct [0] table data to variable tablle

    table = soup.find_all('table')[0]

    # Remove anything that isn't countries or acronyms

    items = [x.text for x in table.find_all('td')]

    # Create a dict of 2 lists (countries and their acronyms), and convert to DF

    countries = []
    acronyms = []
    for i in items:
        if i.startswith('('):
            acronyms.append(i[1:-2])
        else:
            countries.append(i[:-1])

    raw_dict = {'countries': countries, 'acronyms': acronyms}

    # This dict, however is not exactly the right shape for a latter pd.replace,

    raw_dict_df = pd.DataFrame(raw_dict)
    new_dict = raw_dict_df.set_index('acronyms').to_dict()
    countries_dict = new_dict['countries']

    # Adjust a couple keys missing in the countries table

    countries_dict['GB'] = 'Great Britain'
    countries_dict['GR'] = 'Greece'
    del countries_dict['EL']

    # From wrangling, include the lines to lead to a single table output
    # Merge poll file with api job list

    poll_api_merge = poll_db.merge(api_skills, left_on='normalized_job_code', right_on='job_uuid', how='outer')

    # Map countries from countries_dict

    poll_api_merge_countries = poll_api_merge.fillna('Unemployed').replace({"country_code": countries_dict})

    # Small correction to the education level column, where nulls were populated with Unemployed

    poll_api_merge_countries['dem_education_level'] = poll_api_merge_countries['dem_education_level'].str.replace(
        'Unemployed', 'no')

    return poll_api_merge_countries

In [33]:
aquired_result = web()
aquired_result

Unnamed: 0,uuid,country_code,dem_education_level,normalized_job_code,age,gender,question_bbi_2016wave4_basicincome_vote,question_bbi_2016wave4_basicincome_argumentsagainst,question_bbi_2016wave4_basicincome_argumentsfor,error,job_uuid,job_title,normalized_job_title,skills
0,f6e7ee00-deac-0133-4de8-0a81e8b09a82,Austria,no,Unemployed,61 years old,male,I would not vote,None of the above,None of the above,Unemployed,Unemployed,Unemployed,Unemployed,Unemployed
1,83127080-da3d-0133-c74f-0a81e8b09a82,Austria,no,Unemployed,32 years old,male,I would not vote,Foreigners might come to my country and take a...,It creates more equality of opportunity,Unemployed,Unemployed,Unemployed,Unemployed,Unemployed
2,b50dbb80-da53-0133-8956-0a81e8b09a82,Austria,medium,Unemployed,26 years old,female,I would probably vote for it,Foreigners might come to my country and take a...,It reduces anxiety about financing basic needs,Unemployed,Unemployed,Unemployed,Unemployed,Unemployed
3,9949c4c0-da5f-0133-c832-0a81e8b09a82,Austria,no,Unemployed,22 years old,female,I would vote for it,Foreigners might come to my country and take a...,"It increases solidarity, because it is funded ...",Unemployed,Unemployed,Unemployed,Unemployed,Unemployed
4,69f1f400-dc5f-0133-ad9b-0a81e8b09a82,Austria,medium,Unemployed,58 years old,male,I would probably vote for it,Foreigners might come to my country and take a...,It increases appreciation for household work a...,Unemployed,Unemployed,Unemployed,Unemployed,Unemployed
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9644,1cb1aac0-d94c-0133-8baa-0a81e8b09a82,Italy,high,4cee16550636e292b8d136486fce943b,58 years old,male,I would probably vote for it,Only the people who need it most should get so...,It reduces anxiety about financing basic needs...,Unemployed,4cee16550636e292b8d136486fce943b,Data Capture Clerk,data capture clerk,[{'skill_uuid': '162cb662c7ef4dc9e86a2b5e88cbc...
9645,c8c33390-da69-0133-063a-0a81e8b09a82,Poland,medium,4cee16550636e292b8d136486fce943b,44 years old,male,I would vote for it,It might encourage people to stop working | It...,It reduces anxiety about financing basic needs,Unemployed,4cee16550636e292b8d136486fce943b,Data Capture Clerk,data capture clerk,[{'skill_uuid': '162cb662c7ef4dc9e86a2b5e88cbc...
9646,d27d24d0-d9b1-0133-03d4-0a81e8b09a82,Poland,high,4cee16550636e292b8d136486fce943b,24 years old,male,I would vote for it,None of the above,It reduces anxiety about financing basic needs...,Unemployed,4cee16550636e292b8d136486fce943b,Data Capture Clerk,data capture clerk,[{'skill_uuid': '162cb662c7ef4dc9e86a2b5e88cbc...
9647,529f3080-d99a-0133-1b7b-0a81e8b09a82,Portugal,no,4cee16550636e292b8d136486fce943b,40 years old,male,I would probably vote for it,It is impossible to finance,It encourages financial independence and self-...,Unemployed,4cee16550636e292b8d136486fce943b,Data Capture Clerk,data capture clerk,[{'skill_uuid': '162cb662c7ef4dc9e86a2b5e88cbc...


In [40]:



    # Remove unwanted columns

columns = ['country_code', 'normalized_job_title', 'gender']
final_raw = aquired_result[columns]

    # Rename headers

col_dict = {'uuid_x': 'ID number',
                'country_code': 'Country',
                'gender': 'Gender',
                'normalized_job_title': 'Job Title'}
final = final_raw.rename(columns=col_dict, inplace=False)

    # Group by country, job and gender, and add a column with respective count

final_grouped = final.value_counts(['Country', 'Job Title', 'Gender']).reset_index(name='Quantity')



In [43]:
final_grouped

Unnamed: 0,Country,Job Title,Gender,Quantity
0,Germany,Unemployed,female,411
1,Great Britain,Unemployed,female,373
2,France,Unemployed,female,363
3,Germany,Unemployed,male,295
4,Spain,Unemployed,female,274
...,...,...,...,...
2352,Italy,data entry specialist,male,1
2353,Italy,data examination clerk,female,1
2354,Italy,data modeler,female,1
2355,Italy,data processing auxiliary equipment operator,female,1
