In [34]:
import pandas as pd
import numpy as np
import re


In [46]:
def save_grouped_data_to_csv(df, group_column, columns_not_to_keep, position_level):
    grouped_df = df.groupby(group_column)
    
    for name, group in grouped_df:
        print("name : ", name)
        filtered_df = group.drop(columns=columns_not_to_keep)
        first_column_name = filtered_df.columns[0].lower()

        sanitized_name = name.replace(" ", "-").replace("/", "-").replace(",", "")
        sanitized_name = re.sub(r'-+', '-', sanitized_name)  # Replace multiple hyphens with a single one
        sanitized_name = sanitized_name.strip('-')  # Remove leading and trailing hyphens
        
        file_path = f'../created_csv/result/{position_level}/{sanitized_name}_{first_column_name}.csv'
        
        filtered_df.to_csv(file_path, index=False)

### 사용할 Job List

In [3]:
filtered_job_list = [
'BlockChain',
'Developer, full-stack',
'Developer, back-end',
'Developer, front-end',
'Developer, game or graphics',
'Developer, mobile',
'UX / UI Designer',
'Engineer, data',
'Database administrator',
'Data scientist or machine learning specialist',
'Data or business analyst',
'Product manager',
'Developer Advocate',
'Developer, QA or test',
'DevOps specialist'
]

### Language

In [4]:
junior_devtype_language_df = pd.read_csv("../stak_overflow_processing/junior_devtype_language.csv")
middle_devtype_language_df = pd.read_csv("../stak_overflow_processing/middle_devtype_language.csv")
senior_devtype_language_df = pd.read_csv("../stak_overflow_processing/senior_devtype_language.csv")


In [5]:
junior_devtype_language_df.head(5)

Unnamed: 0,YearsGroup,DevType,Language,Count,Rank
0,junior,Academic researcher,Python,54,1
1,junior,Academic researcher,JavaScript,44,2
2,junior,Academic researcher,SQL,40,3
3,junior,Academic researcher,HTML/CSS,40,4
4,junior,Academic researcher,Java,24,5


In [6]:
middle_devtype_language_df.head(5)

Unnamed: 0,YearsGroup,DevType,Language,Count,Rank
0,middle,Academic researcher,Python,257,1
1,middle,Academic researcher,JavaScript,243,2
2,middle,Academic researcher,HTML/CSS,228,3
3,middle,Academic researcher,SQL,205,4
4,middle,Academic researcher,Java,149,5


In [7]:
senior_devtype_language_df.head(5)

Unnamed: 0,YearsGroup,DevType,Language,Count,Rank
0,senior,Academic researcher,Python,1454,1
1,senior,Academic researcher,JavaScript,1337,2
2,senior,Academic researcher,HTML/CSS,1235,3
3,senior,Academic researcher,SQL,1184,4
4,senior,Academic researcher,Java,833,5


In [8]:
# 정의한 직업 리스트에 속하는 자료만 filter
junior_filtered_devtype_language_df = junior_devtype_language_df[junior_devtype_language_df['DevType'].isin(filtered_job_list)]
middle_filtered_devtype_language_df = middle_devtype_language_df[middle_devtype_language_df['DevType'].isin(filtered_job_list)]
senior_filtered_devtype_language_df = senior_devtype_language_df[senior_devtype_language_df['DevType'].isin(filtered_job_list)]


In [9]:
# unique_langugaes
junior_unique_languages = junior_filtered_devtype_language_df['Language'].unique()
middle_unique_languages = middle_filtered_devtype_language_df['Language'].unique()
senior_unique_languages = senior_filtered_devtype_language_df['Language'].unique()


#### junior

In [10]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_primary_proglang_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_primary_proglang_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_primary_proglang_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_primary_proglang_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_primary_proglang_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_primary_proglang_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_primary_proglang_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_primary_proglang_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_primary_proglang_percentages_df.csv')

In [11]:
def update_counts(filtered_df, merge_df, filter_col, merge_col, devtype_col, count_col='Count'):
    for i, row in filtered_df.iterrows():
        value = row[filter_col]
        devtype = row['DevType']
        if devtype == devtype_col and value in merge_df[merge_col].values:
            additional_count = merge_df[merge_df[merge_col] == value][count_col].values[0]
            filtered_df.at[i, count_col] += additional_count
    return filtered_df


In [12]:
update_counts(junior_filtered_devtype_language_df, junior_data_business_analyst_df, 'Language', 'primary_proglang', 'Data or business analyst', 'Count')
update_counts(junior_filtered_devtype_language_df, junior_database_administrator_df, 'Language', 'primary_proglang', 'Database administrator', 'Count')
update_counts(junior_filtered_devtype_language_df, junior_developer_advocate_df, 'Language', 'primary_proglang', 'Developer Advocate', 'Count')
update_counts(junior_filtered_devtype_language_df, junior_Developer_back_end_df, 'Language', 'primary_proglang', 'Developer, back-end', 'Count')
update_counts(junior_filtered_devtype_language_df, junior_Developer_front_end_df, 'Language', 'primary_proglang', 'Developer, front-end', 'Count')
update_counts(junior_filtered_devtype_language_df, junior_Developer_full_stack_df, 'Language', 'primary_proglang', 'Developer, full-stack', 'Count')
update_counts(junior_filtered_devtype_language_df, junior_Developer_mobile_df, 'Language', 'primary_proglang', 'Developer, mobile', 'Count')
update_counts(junior_filtered_devtype_language_df, junior_DevOps_specialist_df, 'Language', 'primary_proglang', 'DevOps specialist', 'Count')
update_counts(junior_filtered_devtype_language_df, junior_UX_UI_Designer_df, 'Language', 'primary_proglang', 'UX / UI Designer', 'Count')

Unnamed: 0,YearsGroup,DevType,Language,Count,Rank
85,junior,Data or business analyst,SQL,230,1
86,junior,Data or business analyst,Python,224,2
87,junior,Data or business analyst,HTML/CSS,128,3
88,junior,Data or business analyst,JavaScript,127,4
89,junior,Data or business analyst,R,74,5
...,...,...,...,...,...
876,junior,Product manager,Delphi,1,30
877,junior,Product manager,Scala,1,31
878,junior,Product manager,Perl,1,32
879,junior,Product manager,Matlab,1,33


#### middle

In [13]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_primary_proglang_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_primary_proglang_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_primary_proglang_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_primary_proglang_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_primary_proglang_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_primary_proglang_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_primary_proglang_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_primary_proglang_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_primary_proglang_percentages_df.csv')

#### senior

In [14]:
# to_csv

In [49]:
save_grouped_data_to_csv(junior_filtered_devtype_language_df, 'DevType', ['DevType', 'YearsGroup'], 'junior')

name :  Data or business analyst
name :  Data scientist or machine learning specialist
name :  Database administrator
name :  DevOps specialist
name :  Developer Advocate
name :  Developer, QA or test
name :  Developer, back-end
name :  Developer, front-end
name :  Developer, full-stack
name :  Developer, game or graphics
name :  Developer, mobile
name :  Engineer, data
name :  Product manager


### Database

In [15]:
junior_devtype_database_df = pd.read_csv("../stak_overflow_processing/junior_devtype_database.csv")
middle_devtype_database_df = pd.read_csv("../stak_overflow_processing/middle_devtype_database.csv")
senior_devtype_database_df = pd.read_csv("../stak_overflow_processing/senior_devtype_database.csv")

# 정의한 직업 리스트에 속하는 자료만 filter
junior_filtered_devtype_database_df = junior_devtype_database_df[junior_devtype_database_df['DevType'].isin(filtered_job_list)]
middle_filtered_devtype_database_df = middle_devtype_database_df[middle_devtype_database_df['DevType'].isin(filtered_job_list)]
senior_filtered_devtype_database_df = senior_devtype_database_df[senior_devtype_database_df['DevType'].isin(filtered_job_list)]

In [16]:
junior_filtered_devtype_database_df

Unnamed: 0,YearsGroup,DevType,Database,Count,Rank
41,junior,Data or business analyst,MySQL,146,1
42,junior,Data or business analyst,PostgreSQL,131,2
43,junior,Data or business analyst,Microsoft SQL Server,105,3
44,junior,Data or business analyst,SQLite,101,4
45,junior,Data or business analyst,MongoDB,72,5
...,...,...,...,...,...
474,junior,Product manager,Dynamodb,1,18
475,junior,Product manager,Cosmos DB,1,19
476,junior,Product manager,Couchbase,1,20
477,junior,Product manager,BigQuery,1,21


In [17]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_db_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_db_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_db_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_db_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_db_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_db_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_db_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_db_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_db_percentages_df.csv')

In [18]:
update_counts(junior_filtered_devtype_database_df, junior_data_business_analyst_df, 'Database', 'db', 'Data or business analyst', 'Count')
update_counts(junior_filtered_devtype_database_df, junior_database_administrator_df, 'Database', 'db', 'Database administrator', 'Count')
update_counts(junior_filtered_devtype_database_df, junior_developer_advocate_df, 'Database', 'db', 'Developer Advocate', 'Count')
update_counts(junior_filtered_devtype_database_df, junior_Developer_back_end_df, 'Database', 'db', 'Developer, back-end', 'Count')
update_counts(junior_filtered_devtype_database_df, junior_Developer_front_end_df, 'Database', 'db', 'Developer, front-end', 'Count')
update_counts(junior_filtered_devtype_database_df, junior_Developer_full_stack_df, 'Database', 'db', 'Developer, full-stack', 'Count')
update_counts(junior_filtered_devtype_database_df, junior_Developer_mobile_df, 'Database', 'db', 'Developer, mobile', 'Count')
update_counts(junior_filtered_devtype_database_df, junior_DevOps_specialist_df, 'Database', 'db', 'DevOps specialist', 'Count')
update_counts(junior_filtered_devtype_database_df, junior_UX_UI_Designer_df, 'Database', 'db', 'UX / UI Designer', 'Count')

Unnamed: 0,YearsGroup,DevType,Database,Count,Rank
41,junior,Data or business analyst,MySQL,147,1
42,junior,Data or business analyst,PostgreSQL,134,2
43,junior,Data or business analyst,Microsoft SQL Server,105,3
44,junior,Data or business analyst,SQLite,102,4
45,junior,Data or business analyst,MongoDB,73,5
...,...,...,...,...,...
474,junior,Product manager,Dynamodb,1,18
475,junior,Product manager,Cosmos DB,1,19
476,junior,Product manager,Couchbase,1,20
477,junior,Product manager,BigQuery,1,21


In [48]:
save_grouped_data_to_csv(junior_filtered_devtype_database_df, 'DevType', ['DevType', 'YearsGroup'], 'junior')

name :  Data or business analyst
name :  Data scientist or machine learning specialist
name :  Database administrator
name :  DevOps specialist
name :  Developer Advocate
name :  Developer, QA or test
name :  Developer, back-end
name :  Developer, front-end
name :  Developer, full-stack
name :  Developer, game or graphics
name :  Developer, mobile
name :  Engineer, data
name :  Product manager


### Framework

In [19]:
junior_devtype_framework_df = pd.read_csv("../stak_overflow_processing/junior_devtype_framework.csv")
middle_devtype_framework_df = pd.read_csv("../stak_overflow_processing/middle_devtype_framework.csv")
senior_devtype_framework_df = pd.read_csv("../stak_overflow_processing/senior_devtype_framework.csv")

# 정의한 직업 리스트에 속하는 자료만 filter
junior_filtered_devtype_framework_df = junior_devtype_framework_df[junior_devtype_framework_df['DevType'].isin(filtered_job_list)]
middle_filtered_devtype_framework_df = middle_devtype_framework_df[middle_devtype_framework_df['DevType'].isin(filtered_job_list)]
senior_filtered_devtype_framework_df = senior_devtype_framework_df[senior_devtype_framework_df['DevType'].isin(filtered_job_list)]

junior_filtered_devtype_framework_df

Unnamed: 0,YearsGroup,DevType,Webframe,Count,Rank
54,junior,Data or business analyst,Django,62,1
55,junior,Data or business analyst,Flask,47,2
56,junior,Data or business analyst,React.js,43,3
57,junior,Data or business analyst,jQuery,42,4
58,junior,Data or business analyst,Express,27,5
...,...,...,...,...,...
607,junior,Product manager,Symfony,1,22
608,junior,Product manager,Svelte,1,23
609,junior,Product manager,Fastify,1,24
610,junior,Product manager,ASP.NET Core,1,25


In [20]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_ide_main_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_ide_main_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_ide_main_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_ide_main_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_ide_main_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_ide_main_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_ide_main_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_ide_main_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_ide_main_percentages_df.csv')

In [21]:
junior_Developer_back_end_df.head(5)

Unnamed: 0.1,Unnamed: 0,ide_main,Percentage,Count
0,0,IntelliJ IDEA,40.0,2
1,1,VS Code (Visual Studio Code),40.0,2
2,2,Emacs,20.0,1


In [22]:
update_counts(junior_filtered_devtype_framework_df, junior_data_business_analyst_df, 'Webframe', 'ide_main', 'Data or business analyst', 'Count')
update_counts(junior_filtered_devtype_framework_df, junior_database_administrator_df, 'Webframe', 'ide_main', 'Database administrator', 'Count')
update_counts(junior_filtered_devtype_framework_df, junior_developer_advocate_df, 'Webframe', 'ide_main', 'Developer Advocate', 'Count')
update_counts(junior_filtered_devtype_framework_df, junior_Developer_back_end_df, 'Webframe', 'ide_main', 'Developer, back-end', 'Count')
update_counts(junior_filtered_devtype_framework_df, junior_Developer_front_end_df, 'Webframe', 'ide_main', 'Developer, front-end', 'Count')
update_counts(junior_filtered_devtype_framework_df, junior_Developer_full_stack_df, 'Webframe', 'ide_main', 'Developer, full-stack', 'Count')
update_counts(junior_filtered_devtype_framework_df, junior_Developer_mobile_df, 'Webframe', 'ide_main', 'Developer, mobile', 'Count')
update_counts(junior_filtered_devtype_framework_df, junior_DevOps_specialist_df, 'Webframe', 'ide_main', 'DevOps specialist', 'Count')
update_counts(junior_filtered_devtype_framework_df, junior_UX_UI_Designer_df, 'Webframe', 'ide_main', 'UX / UI Designer', 'Count')

Unnamed: 0,YearsGroup,DevType,Webframe,Count,Rank
54,junior,Data or business analyst,Django,62,1
55,junior,Data or business analyst,Flask,47,2
56,junior,Data or business analyst,React.js,43,3
57,junior,Data or business analyst,jQuery,42,4
58,junior,Data or business analyst,Express,27,5
...,...,...,...,...,...
607,junior,Product manager,Symfony,1,22
608,junior,Product manager,Svelte,1,23
609,junior,Product manager,Fastify,1,24
610,junior,Product manager,ASP.NET Core,1,25


In [47]:
save_grouped_data_to_csv(senior_filtered_devtype_framework_df, 'DevType', ['DevType', 'YearsGroup'], 'junior')

name :  Data or business analyst
name :  Data scientist or machine learning specialist
name :  Database administrator
name :  DevOps specialist
name :  Developer Advocate
name :  Developer, QA or test
name :  Developer, back-end
name :  Developer, front-end
name :  Developer, full-stack
name :  Developer, game or graphics
name :  Developer, mobile
name :  Engineer, data
name :  Product manager


### Idle

In [23]:
junior_devtype_tool_df = pd.read_csv("../stak_overflow_processing/junior_devtype_tools.csv")
middle_devtype_tool_df = pd.read_csv("../stak_overflow_processing/middle_devtype_tools.csv")
senior_devtype_tool_df = pd.read_csv("../stak_overflow_processing/senior_devtype_tools.csv")

# 정의한 직업 리스트에 속하는 자료만 filter
junior_filtered_devtype_tool_df = junior_devtype_tool_df[junior_devtype_tool_df['DevType'].isin(filtered_job_list)]
middle_filtered_devtype_tool_df = middle_devtype_tool_df[middle_devtype_tool_df['DevType'].isin(filtered_job_list)]
senior_filtered_devtype_tool_df = senior_devtype_tool_df[senior_devtype_tool_df['DevType'].isin(filtered_job_list)]

junior_filtered_devtype_tool_df

Unnamed: 0,YearsGroup,DevType,Tools,Count,Rank
68,junior,Data or business analyst,Visual Studio Code,82,1
69,junior,Data or business analyst,IPython/Jupyter,45,2
70,junior,Data or business analyst,Github,42,3
71,junior,Data or business analyst,Notepad++,32,4
72,junior,Data or business analyst,PyCharm,30,5
...,...,...,...,...,...
837,junior,Product manager,IPython,1,32
838,junior,Product manager,"RAD Studio (Delphi, C++ Builder)",1,33
839,junior,Product manager,NetBeans,1,34
840,junior,Product manager,PHPStorm,1,35


In [24]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_ide_main_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_ide_main_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_ide_main_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_ide_main_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_ide_main_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_ide_main_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_ide_main_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_ide_main_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_ide_main_percentages_df.csv')

In [25]:
update_counts(junior_filtered_devtype_tool_df, junior_data_business_analyst_df, 'Tools', 'ide_main', 'Data or business analyst', 'Count')
update_counts(junior_filtered_devtype_tool_df, junior_database_administrator_df, 'Tools', 'ide_main', 'Database administrator', 'Count')
update_counts(junior_filtered_devtype_tool_df, junior_developer_advocate_df, 'Tools', 'ide_main', 'Developer Advocate', 'Count')
update_counts(junior_filtered_devtype_tool_df, junior_Developer_back_end_df, 'Tools', 'ide_main', 'Developer, back-end', 'Count')
update_counts(junior_filtered_devtype_tool_df, junior_Developer_front_end_df, 'Tools', 'ide_main', 'Developer, front-end', 'Count')
update_counts(junior_filtered_devtype_tool_df, junior_Developer_full_stack_df, 'Tools', 'ide_main', 'Developer, full-stack', 'Count')
update_counts(junior_filtered_devtype_tool_df, junior_Developer_mobile_df, 'Tools', 'ide_main', 'Developer, mobile', 'Count')
update_counts(junior_filtered_devtype_tool_df, junior_DevOps_specialist_df, 'Tools', 'ide_main', 'DevOps specialist', 'Count')
update_counts(junior_filtered_devtype_tool_df, junior_UX_UI_Designer_df, 'Tools', 'ide_main', 'UX / UI Designer', 'Count')

Unnamed: 0,YearsGroup,DevType,Tools,Count,Rank
68,junior,Data or business analyst,Visual Studio Code,82,1
69,junior,Data or business analyst,IPython/Jupyter,45,2
70,junior,Data or business analyst,Github,42,3
71,junior,Data or business analyst,Notepad++,32,4
72,junior,Data or business analyst,PyCharm,31,5
...,...,...,...,...,...
837,junior,Product manager,IPython,1,32
838,junior,Product manager,"RAD Studio (Delphi, C++ Builder)",1,33
839,junior,Product manager,NetBeans,1,34
840,junior,Product manager,PHPStorm,1,35


In [45]:
save_grouped_data_to_csv(junior_filtered_devtype_tool_df, 'DevType', ['DevType', 'YearsGroup'], 'junior')

name :  Data or business analyst
name :  Data scientist or machine learning specialist
name :  Database administrator
name :  DevOps specialist
name :  Developer Advocate
name :  Developer, QA or test
name :  Developer, back-end
name :  Developer, front-end
name :  Developer, full-stack
name :  Developer, game or graphics
name :  Developer, mobile
name :  Engineer, data
name :  Product manager


In [81]:
def transform_and_save_data(df, position_level, file_name):
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    transformed_df = df.drop(columns=['Percentage'])

    transformed_df['Rank'] = transformed_df['Count'].rank(method='max', ascending=False).astype(int)

    pattern = r'_(.*?)_'
    match = re.search(pattern, file_name)
    dev_name = match.group(1)

    first_column_name = transformed_df.columns[0].lower()
    sanitized_name = dev_name.replace(" ", "-").replace("/", "-").replace(",", "")
    sanitized_name = re.sub(r'-+', '-', sanitized_name)  # Replace multiple hyphens with a single one
    sanitized_name = sanitized_name.strip('-')  # Remove leading and trailing hyphens
     
    # file_path = f'../created_csv/result/{first_column_name}_tools.csv'
    file_path = f'../created_csv/result/{position_level}/{dev_name}_{first_column_name}.csv'
    transformed_df.to_csv(file_path, index=False)
    # transformed_df.to_csv(file_path, index=False)

## Job_code

In [82]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_job_code_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_job_code_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_job_code_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_job_code_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_job_code_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_job_code_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_job_code_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_job_code_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_job_code_percentages_df.csv')

In [83]:
transform_and_save_data(junior_data_business_analyst_df, 'junior', 'junior_Data-or-business-analyst_job_code_percentages_df')
transform_and_save_data(junior_database_administrator_df, 'junior', 'junior_Database-administrator_job_code_percentages_df')
transform_and_save_data(junior_developer_advocate_df, 'junior', 'junior_Developer-Advocate_job_code_percentages_df')
transform_and_save_data(junior_Developer_back_end_df, 'junior', 'junior_Developer-back-end_job_code_percentages_df')
transform_and_save_data(junior_Developer_front_end_df, 'junior', 'junior_Developer-front-end_job_code_percentages_df')
transform_and_save_data(junior_Developer_full_stack_df, 'junior', 'junior_Developer-full-stack_job_code_percentages_df')
transform_and_save_data(junior_Developer_mobile_df, 'junior', 'junior_Developer-mobile_job_code_percentages_df')
transform_and_save_data(junior_DevOps_specialist_df, 'junior', 'junior_DevOps-specialist_job_code_percentages_df')
transform_and_save_data(junior_UX_UI_Designer_df, 'junior', 'junior_UX-UI-Designer_job_code_percentages_df')

## lifestyle_sleep

In [79]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_lifestyle_sleep_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_lifestyle_sleep_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_lifestyle_sleep_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_lifestyle_sleep_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_lifestyle_sleep_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_lifestyle_sleep_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_lifestyle_sleep_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_lifestyle_sleep_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_lifestyle_sleep_percentages_df.csv')

In [80]:
transform_and_save_data(junior_data_business_analyst_df, 'junior', 'junior_Data-or-business-analyst_lifestyle_sleep_percentages_df')
transform_and_save_data(junior_database_administrator_df, 'junior', 'junior_Database-administrator_lifestyle_sleep_percentages_df')
transform_and_save_data(junior_developer_advocate_df, 'junior', 'junior_Developer-Advocate_lifestyle_sleep_percentages_df')
transform_and_save_data(junior_Developer_back_end_df, 'junior', 'junior_Developer-back-end_lifestyle_sleep_percentages_df')
transform_and_save_data(junior_Developer_front_end_df, 'junior', 'junior_Developer-front-end_lifestyle_sleep_percentages_df')
transform_and_save_data(junior_Developer_full_stack_df, 'junior', 'junior_Developer-full-stack_lifestyle_sleep_percentages_df')
transform_and_save_data(junior_Developer_mobile_df, 'junior', 'junior_Developer-mobile_lifestyle_sleep_percentages_df')
transform_and_save_data(junior_DevOps_specialist_df, 'junior', 'junior_DevOps-specialist_lifestyle_sleep_percentages_df')
transform_and_save_data(junior_UX_UI_Designer_df, 'junior', 'junior_UX-UI-Designer_lifestyle_sleep_percentages_df')

## producive_to_job

In [84]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_productive_to_job_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_productive_to_job_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_productive_to_job_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_productive_to_job_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_productive_to_job_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_productive_to_job_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_productive_to_job_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_productive_to_job_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_productive_to_job_percentages_df.csv')

In [85]:
transform_and_save_data(junior_data_business_analyst_df, 'junior', 'junior_Data-or-business-analyst_productive_to_job_percentages_df')
transform_and_save_data(junior_database_administrator_df, 'junior', 'junior_Database-administrator_productive_to_job_percentages_df')
transform_and_save_data(junior_developer_advocate_df, 'junior', 'junior_Developer-Advocate_productive_to_job_percentages_df')
transform_and_save_data(junior_Developer_back_end_df, 'junior', 'junior_Developer-back-end_productive_to_job_percentages_df')
transform_and_save_data(junior_Developer_front_end_df, 'junior', 'junior_Developer-front-end_productive_to_job_percentages_df')
transform_and_save_data(junior_Developer_full_stack_df, 'junior', 'junior_Developer-full-stack_productive_to_job_percentages_df')
transform_and_save_data(junior_Developer_mobile_df, 'junior', 'junior_Developer-mobile_productive_to_job_percentages_df')
transform_and_save_data(junior_DevOps_specialist_df, 'junior', 'junior_DevOps-specialist_productive_to_job_percentages_df')
transform_and_save_data(junior_UX_UI_Designer_df, 'junior', 'junior_UX-UI-Designer_productive_to_job_percentages_df')

### learn_time

In [88]:
junior_data_business_analyst_df=pd.read_csv('../created_csv/junior/junior_Data-or-business-analyst_learn_time_percentages_df.csv')
junior_database_administrator_df=pd.read_csv('../created_csv/junior/junior_Database-administrator_learn_time_percentages_df.csv')
junior_developer_advocate_df=pd.read_csv('../created_csv/junior/junior_Developer-Advocate_learn_time_percentages_df.csv')
junior_Developer_back_end_df=pd.read_csv('../created_csv/junior/junior_Developer-back-end_learn_time_percentages_df.csv')
junior_Developer_front_end_df=pd.read_csv('../created_csv/junior/junior_Developer-front-end_learn_time_percentages_df.csv')
junior_Developer_full_stack_df=pd.read_csv('../created_csv/junior/junior_Developer-full-stack_learn_time_percentages_df.csv')
junior_Developer_mobile_df=pd.read_csv('../created_csv/junior/junior_Developer-mobile_learn_time_percentages_df.csv')
junior_DevOps_specialist_df=pd.read_csv('../created_csv/junior/junior_DevOps-specialist_learn_time_percentages_df.csv')
junior_UX_UI_Designer_df=pd.read_csv('../created_csv/junior/junior_UX-UI-Designer_learn_time_percentages_df.csv')

In [89]:
transform_and_save_data(junior_data_business_analyst_df, 'junior', 'junior_Data-or-business-analyst_learn_time_percentages_df')
transform_and_save_data(junior_database_administrator_df, 'junior', 'junior_Database-administrator_productive_to_job_percentages_df')
transform_and_save_data(junior_developer_advocate_df, 'junior', 'junior_Developer-Advocate_learn_time_percentages_df')
transform_and_save_data(junior_Developer_back_end_df, 'junior', 'junior_Developer-back-end_learn_time_percentages_df')
transform_and_save_data(junior_Developer_front_end_df, 'junior', 'junior_Developer-front-end_learn_time_percentages_df')
transform_and_save_data(junior_Developer_full_stack_df, 'junior', 'junior_Developer-full-stack_learn_time_percentages_df')
transform_and_save_data(junior_Developer_mobile_df, 'junior', 'junior_Developer-mobilelearn_timeb_percentages_df')
transform_and_save_data(junior_DevOps_specialist_df, 'junior', 'junior_DevOps-specialist_learn_time_percentages_df')
transform_and_save_data(junior_UX_UI_Designer_df, 'junior', 'junior_UX-UI-Designer_learn_time_percentages_df')