In [None]:
#Import
import sys
from pathlib import Path

In [None]:
sys.path.append(str(Path('..').resolve()))

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from src.load import load_data_from_zip
from src.preprocess import preprocess_data, LANGUAGES, TOP_QUANTILE
from src.analysis import (
    workexp_stats,
    python_vs_non_python_salary,
    remote_work_salary,
    high_paid_remote_by_industry,
    respondents_with_all_answers,
    salary_by_experience_level,
    salary_by_language
)
from src.visualization import (
    age_distribution_bar,
    barplot_salary_by_experience,
    industry_salary_heatmap,
    respondents_world_map,
    barh_plot,
    )

In [None]:
#Load data
survey_df, schema_df = load_data_from_zip('data/stack-overflow-developer-survey-2025.zip')

In [None]:
print(f'Total respondents: {survey_df['ResponseId'].nunique():,}')

In [None]:
#Preprocess data
df = preprocess_data(survey_df)
df.head()

In [None]:
#Count respondents who answered all survey questions
complete_count = respondents_with_all_answers(df, schema_df)
print(f'Number of respondents who answered all survey questions: {complete_count}')

In [None]:
"""ðŸ“ŒIn this analysis, we checked how many respondents completed all the questions in the survey.
The result was 0 respondents, meaning that no participant answered all the columns specified in the schema.
This indicates that there are a large number of missing data points."""

In [None]:
#Work Experience Overview
workexp_stats(df)

In [None]:
"""ðŸ“ŒThis shows that most respondents have around 10 years of work experience,
although the average is slightly higher due to the presence of respondents with very extensive experience."""

In [None]:
#Calculating average and median salaries by experience levels
salary_exp_df = salary_by_experience_level(df)
barplot_salary_by_experience(salary_exp_df)
"""ðŸ“ŒAnalysis of the ConvertedCompYearly column showed that the average and median salaries increase
with increasing experience.

This confirms the expected trend: the higher the level of experience, the higher the level of compensation,
with average values growing faster than median values, indicating high salaries among top specialists."""

In [None]:
#Python vs Non-Python Salary
python_salary = python_vs_non_python_salary(df)
python_salary.index = python_salary.index.map({
    True: 'Python Developers',
    False: 'Non-Python Developers'
})

In [None]:
"""ðŸ“ŒThe analysis showed that developers who know Python have a higher median salary compared to those who do not.
The difference is not very large, but knowledge of Python gives a slight advantage in compensation."""

In [None]:
#Remote Work Impact
remote_salary = remote_work_salary(df)

In [None]:
"""ðŸ“ŒRemote work is associated with higher salaries, flexibility, and the ability to choose
where to work positively affect compensation, while working entirely in an office is the least favorable
option in terms of median salary."""

In [None]:
#Median salary by language
salary_lang = salary_by_language(df)
barh_plot(
    series=salary_lang["MedianSalary"],
    title="Median Salary by Programming Language",
    xlabel="Median Salary (USD)",
    file_name="salary_by_language.png"
)

In [None]:
"""ðŸ“ŒDevelopers who know R have the highest median salary, while Python and SQL offer average salaries,
and JavaScript is slightly lower.
This may reflect the demand for certain technologies depending on specialization and industry."""

In [None]:
#High-paid Remote Workers by Industry
industry_counts = high_paid_remote_by_industry(df, TOP_QUANTILE)
barh_plot(
    industry_counts,
    title = "Industries among High-Paid Remote Workers",
    xlabel='Number of Respondents',
    file_name='Industries_among_High-paid_Remote_Workers.png'
)

In [None]:
"""ðŸ“ŒThe vast majority of high-paying remote developers work in Software Development, indicating high
demand for skilled programmers in this field. Other industries are significantly less represented,
although FinTech and Healthcare also offer high salaries for remote work."""

In [None]:
#Industry Ã— Python Heatmap
industry_salary_heatmap(df)

In [None]:
"""ðŸ“ŒFinTech and Healthcare â€“ those who know Python benefit significantly, highlighting the value of
programming languages in these industries.
Higher Education â€“ those who do not know Python earn slightly more than those who do.
Perhaps Python is not as critical here.
Software Development â€“ Python knowledge leads to a slight increase in salary.
Overall, Python knowledge increases salaries in most industries, especially in FinTech, Healthcare,
and Internet/Telecom."""

In [None]:
#Age Distribution
age_distribution_bar(df)
"""ðŸ“ŒThe graph confirms that the IT community that participated in the Annual Developer Survey
consists mainly of young and middle-aged developers."""

In [None]:
#Global Respondents Map
respondents_world_map(df)
"""Respondents are concentrated in North America, Europe, and India.
The number of respondents correlates with the development of the IT market and the number
of professional developers in the country."""

In [None]:
#Final Insights
"""**Key insights
1. **Respondent Distribution:** The highest number of respondents are concentrated in North America and Europe,
while Africa, South America, and some parts of Asia show lower participation, indicating uneven global representation.

2. **Remote Work & Salary:** Remote work is common among high-paid respondents, with median salaries slightly higher
for fully remote positions compared to hybrid roles.

3. **Technology Preferences:** R, C#, Python, SQL, and JavaScript are among the most used languages.
R and C# show higher median salaries, while Python is widely adopted but slightly lower in compensation.

4. **Industry Representation:** Software development dominates high-paid remote roles, followed by fintech, healthcare,
and internet/telecom sectors.

5. **Demographics & Experience:** Respondents span a wide range of experience and age groups,
with clear trends showing higher compensation correlating with more years of experience.

6. **Overall Trends:** Data reflects that remote work, specialized tech skills, and software-focused industries
are key drivers of higher compensation, with regional representation affecting overall survey insights.
"""