Analyze the increase in popularity of a language in the current year due to developer’s interest in the previous year. #301

anushkasaxena07 · anushkasaxena07 · commit b2dc5453db3f · 2024-07-02T23:11:44.000+05:30
diff --git a/Stackoverflow_Survey_Analysis.ipynb b/Stackoverflow_Survey_Analysis.ipynb
@@ -21899,25 +21899,61 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "# Analyze the increase in popularity of a language in the current year due to developer’s interest in the previous year. #301"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# Load the CSV files\n",
+    "file_2022 = pd.read_csv(r\"Data/survey_results_public_2022.csv\")\n",
+    "file_2023 = pd.read_csv(r\"Data/survey_results_public_2023.csv\")\n",
+    "\n",
+    "def preprocess_data(df):\n",
+    "    # Fill missing values in relevant columns\n",
+    "    df['LanguageWorkedWith'] = df['LanguageWorkedWith'].fillna('')\n",
+    "    df['LanguageDesireNextYear'] = df['LanguageDesireNextYear'].fillna('')\n",
+    "    return df\n",
+    "\n",
+    "# Preprocess the data\n",
+    "data_2022 = preprocess_data(file_2022)\n",
+    "data_2023 = preprocess_data(file_2023)\n",
+    "\n",
+    "# Function to count the occurrences of each language\n",
+    "def count_languages(df, column):\n",
+    "    languages = df[column].str.split(';').explode().str.strip()\n",
+    "    return languages.value_counts()\n",
+    "\n",
+    "# Count languages for both years\n",
+    "lang_count_2022 = count_languages(data_2022, 'LanguageWorkedWith')\n",
+    "lang_desire_2022 = count_languages(data_2022, 'LanguageDesireNextYear')\n",
+    "lang_count_2023 = count_languages(data_2023, 'LanguageWorkedWith')\n",
+    "\n",
+    "# Convert to DataFrame for easier comparison\n",
+    "lang_count_2022_df = lang_count_2022.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2022'})\n",
+    "lang_desire_2022_df = lang_desire_2022.reset_index().rename(columns={'index': 'Language', 'LanguageDesireNextYear': 'Desire_2022'})\n",
+    "lang_count_2023_df = lang_count_2023.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2023'})\n",
+    "\n",
+    "# Merge the dataframes\n",
+    "merged_df = lang_count_2022_df.merge(lang_desire_2022_df, on='Language', how='outer').merge(lang_count_2023_df, on='Language', how='outer').fillna(0)\n",
+    "\n",
+    "# Calculate the increase in popularity\n",
+    "merged_df['Increase'] = merged_df['Count_2023'] - merged_df['Count_2022']\n",
+    "merged_df['Interest_to_Popularity'] = (merged_df['Count_2023'] - merged_df['Count_2022']) / merged_df['Desire_2022']\n",
+    "\n",
+    "# Sort by the increase in popularity\n",
+    "merged_df = merged_df.sort_values(by='Increase', ascending=False)\n",
+    "\n",
+    "print(merged_df[['Language', 'Count_2022', 'Desire_2022', 'Count_2023', 'Increase', 'Interest_to_Popularity']])\n"
+   ]
   },
   {
    "cell_type": "markdown",