In [1]:
%store -r df2023

In [2]:
import plotly.express as px
import pandas as pd

## Popular platforms among industries and professions in technology

In [3]:
def analysis(df, column):
        platform_tech_data = df.groupby([column, 'PlatformHaveWorkedWith']).size().reset_index(name='Count')
        top10_platform_tech = platform_tech_data.sort_values([column, 'Count'], ascending=[True, False]).groupby(column).head(10)
        fig_platform_tech_stacked = px.bar(
        top10_platform_tech,
        x=column,
        y='Count',
        color='PlatformHaveWorkedWith',
        title=f'Platforms Used in {column} (Stacked Bar)',
        labels={'PlatformHaveWorkedWith': 'Platform', 'Count': 'Count'},
        template='plotly_dark'
)
        fig_platform_tech_stacked.update_layout(
        barmode='stack',
        xaxis_title='Professional Tech',
        yaxis_title='Count',
        title_x=0.5
)

        fig_platform_tech_stacked.show()


In [4]:
analysis(df2023, 'Industry')

In [5]:
analysis(df2023, 'ProfessionalTech')



### Analysis 
##### AWS is popular first choice in all industries, followed by Microsoft Azure. 

## Years Of Coding


In [6]:
top_languages = df2023['LanguageHaveWorkedWith'].value_counts().nlargest(10).index
df_filtered_top10 = df2023[df2023['LanguageHaveWorkedWith'].isin(top_languages)]
fig_histogram = px.histogram(
    df_filtered_top10,
    x='LanguageHaveWorkedWith',
    y='YearsCode',
    title='Histogram of Years of Coding Experience for Top 10 Programming Languages',
    labels={'LanguageHaveWorkedWith': 'Programming Language', 'YearsCode': 'Years of Coding Experience'},
    template='plotly_dark',
    histfunc='avg'  
)
fig_histogram.update_layout(
    xaxis_title='Programming Language',
    yaxis_title='Average Years of Coding Experience',
    title_x=0.5
)

fig_histogram.show()



##### The combination of C#, HTML/CSS and SQL appears to be the oldest combination of coding languages for developers.

In [10]:
def compare_languages(dataframe, worked_with_col, want_to_work_with_col, column, top_n=20):
    try:
        count_col = 'Count'
        worked_with_data = dataframe.groupby(worked_with_col).size().reset_index(name=count_col)
        worked_with_data = worked_with_data.sort_values(by=count_col, ascending=False).head(top_n)
        worked_with_data['Category'] = 'Have Worked With'
        worked_with_data.rename(columns={worked_with_col: f'{column}'}, inplace=True)
        want_to_work_with_data = dataframe.groupby(want_to_work_with_col).size().reset_index(name=count_col)
        want_to_work_with_data = want_to_work_with_data.sort_values(by=count_col, ascending=False).head(top_n)
        want_to_work_with_data['Category'] = 'Want to Work With'
        want_to_work_with_data.rename(columns={want_to_work_with_col: f'{column}'}, inplace=True)
    
        combined_data = pd.concat([worked_with_data, want_to_work_with_data], ignore_index=True)
        
        if f'{column}' not in combined_data.columns:
            raise KeyError("'Language' column is missing from the combined data")
        combined_data = combined_data.groupby([f'{column}', 'Category']).agg({count_col: 'sum'}).reset_index()
        combined_top_languages = combined_data.groupby(f'{column}').agg({count_col: 'sum'}).reset_index()
        top_languages = combined_top_languages.sort_values(by=count_col, ascending=False).head(top_n)[f'{column}']
        filtered_combined_data = combined_data[combined_data[f'{column}'].isin(top_languages)]
        
        fig = px.bar(
            filtered_combined_data,
            x=f'{column}',
            y=count_col,
            color='Category',
            barmode='group',
            title=f'Comparison of {column} Professionals Have Worked With vs Want to Work With (Top {top_n})',
            labels={f'{column}': f'{column}', count_col: 'Count'},
            template='plotly_dark'
        )
        
        fig.update_layout(
            xaxis_title=f'{column}',
            yaxis_title='Count',
            title_x=0.5
        )
        
        return fig
    
    except Exception as e:
        print(f"An error occurred: {e}")



In [11]:
compare_languages(
    dataframe=df2023,
    worked_with_col='LanguageHaveWorkedWith',
    want_to_work_with_col='LanguageWantToWorkWith',
    column= 'Language',
    top_n=20
)

In [12]:
compare_languages(
    dataframe=df2023,
    worked_with_col='DatabaseHaveWorkedWith',
    want_to_work_with_col='DatabaseWantToWorkWith',
    column='Database',
    top_n=20
)

In [14]:
compare_languages(
    dataframe=df2023,
    worked_with_col='WebframeHaveWorkedWith',
    want_to_work_with_col='WebframeWantToWorkWith',
    column='Webframe',
    top_n=20
)

In [16]:
compare_languages(
    dataframe=df2023,
    worked_with_col='PlatformHaveWorkedWith',
    want_to_work_with_col='PlatformWantToWorkWith',
    column='Platform',
    top_n=20)