@@ -21899,25 +21899,61 @@
2189921899 ]
2190021900 },
2190121901 {
21902- "cell_type": "code",
21903- "execution_count": null,
21904- "metadata": {},
21905- "outputs": [],
21906- "source": []
21907- },
21908- {
21909- "cell_type": "code",
21910- "execution_count": null,
21902+ "cell_type": "markdown",
2191121903 "metadata": {},
21912- "outputs": [],
21913- "source": []
21904+ "source": [
21905+ "# Analyze the increase in popularity of a language in the current year due to developer’s interest in the previous year. #301"
21906+ ]
2191421907 },
2191521908 {
2191621909 "cell_type": "code",
2191721910 "execution_count": null,
2191821911 "metadata": {},
2191921912 "outputs": [],
21920- "source": []
21913+ "source": [
21914+ "import pandas as pd\n",
21915+ "\n",
21916+ "# Load the CSV files\n",
21917+ "file_2022 = pd.read_csv(r\"Data/survey_results_public_2022.csv\")\n",
21918+ "file_2023 = pd.read_csv(r\"Data/survey_results_public_2023.csv\")\n",
21919+ "\n",
21920+ "def preprocess_data(df):\n",
21921+ " # Fill missing values in relevant columns\n",
21922+ " df['LanguageWorkedWith'] = df['LanguageWorkedWith'].fillna('')\n",
21923+ " df['LanguageDesireNextYear'] = df['LanguageDesireNextYear'].fillna('')\n",
21924+ " return df\n",
21925+ "\n",
21926+ "# Preprocess the data\n",
21927+ "data_2022 = preprocess_data(file_2022)\n",
21928+ "data_2023 = preprocess_data(file_2023)\n",
21929+ "\n",
21930+ "# Function to count the occurrences of each language\n",
21931+ "def count_languages(df, column):\n",
21932+ " languages = df[column].str.split(';').explode().str.strip()\n",
21933+ " return languages.value_counts()\n",
21934+ "\n",
21935+ "# Count languages for both years\n",
21936+ "lang_count_2022 = count_languages(data_2022, 'LanguageWorkedWith')\n",
21937+ "lang_desire_2022 = count_languages(data_2022, 'LanguageDesireNextYear')\n",
21938+ "lang_count_2023 = count_languages(data_2023, 'LanguageWorkedWith')\n",
21939+ "\n",
21940+ "# Convert to DataFrame for easier comparison\n",
21941+ "lang_count_2022_df = lang_count_2022.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2022'})\n",
21942+ "lang_desire_2022_df = lang_desire_2022.reset_index().rename(columns={'index': 'Language', 'LanguageDesireNextYear': 'Desire_2022'})\n",
21943+ "lang_count_2023_df = lang_count_2023.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2023'})\n",
21944+ "\n",
21945+ "# Merge the dataframes\n",
21946+ "merged_df = lang_count_2022_df.merge(lang_desire_2022_df, on='Language', how='outer').merge(lang_count_2023_df, on='Language', how='outer').fillna(0)\n",
21947+ "\n",
21948+ "# Calculate the increase in popularity\n",
21949+ "merged_df['Increase'] = merged_df['Count_2023'] - merged_df['Count_2022']\n",
21950+ "merged_df['Interest_to_Popularity'] = (merged_df['Count_2023'] - merged_df['Count_2022']) / merged_df['Desire_2022']\n",
21951+ "\n",
21952+ "# Sort by the increase in popularity\n",
21953+ "merged_df = merged_df.sort_values(by='Increase', ascending=False)\n",
21954+ "\n",
21955+ "print(merged_df[['Language', 'Count_2022', 'Desire_2022', 'Count_2023', 'Increase', 'Interest_to_Popularity']])\n"
21956+ ]
2192121957 },
2192221958 {
2192321959 "cell_type": "markdown",
0 commit comments