## Setup: Downloading and Loading the Data
**Install the libraries**


In [94]:
!pip install pandas



In [95]:
!pip install matplotlib




### Step 2: Import necessary libraries and load the dataset


In [135]:
import pandas as pd
import matplotlib.pyplot as plt

### Load the data


In [138]:
df = pd.read_csv("survey_data_RAW_for google_looker_lab26.csv")

### Display the first few rows of the data to understand its structure


In [140]:
df.head(1)

Unnamed: 0,ResponseId,MainBranch,Age,Employment,RemoteWork,Check,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,...,JobSatPoints_6,JobSatPoints_7,JobSatPoints_8,JobSatPoints_9,JobSatPoints_10,JobSatPoints_11,SurveyLength,SurveyEase,ConvertedCompYearly,JobSat
0,2,I am a developer by profession,35-44 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,


In [143]:
df.columns.tolist()

['ResponseId',
 'MainBranch',
 'Age',
 'Employment',
 'RemoteWork',
 'Check',
 'CodingActivities',
 'EdLevel',
 'LearnCode',
 'LearnCodeOnline',
 'TechDoc',
 'YearsCode',
 'YearsCodePro',
 'DevType',
 'OrgSize',
 'PurchaseInfluence',
 'BuyNewTool',
 'BuildvsBuy',
 'TechEndorse',
 'Country',
 'Currency',
 'CompTotal',
 'LanguageHaveWorkedWith',
 'LanguageWantToWorkWith',
 'LanguageAdmired',
 'DatabaseHaveWorkedWith',
 'DatabaseWantToWorkWith',
 'DatabaseAdmired',
 'PlatformHaveWorkedWith',
 'PlatformWantToWorkWith',
 'PlatformAdmired',
 'WebframeHaveWorkedWith',
 'WebframeWantToWorkWith',
 'WebframeAdmired',
 'EmbeddedHaveWorkedWith',
 'EmbeddedWantToWorkWith',
 'EmbeddedAdmired',
 'MiscTechHaveWorkedWith',
 'MiscTechWantToWorkWith',
 'MiscTechAdmired',
 'ToolsTechHaveWorkedWith',
 'ToolsTechWantToWorkWith',
 'ToolsTechAdmired',
 'NEWCollabToolsHaveWorkedWith',
 'NEWCollabToolsWantToWorkWith',
 'NEWCollabToolsAdmired',
 'OpSysPersonal use',
 'OpSysProfessional use',
 'OfficeStackAsyncHa

In [153]:
# 🧹 Drop rows with any missing values in key fields
df_clean = df.dropna(subset=[
    'LanguageHaveWorkedWith', 'DatabaseHaveWorkedWith', 'PlatformHaveWorkedWith', 'WebframeHaveWorkedWith',
    'LanguageWantToWorkWith', 'DatabaseWantToWorkWith', 'PlatformWantToWorkWith', 'WebframeWantToWorkWith',
    'Age', 'Country', 'EdLevel'
]).copy()

# 🗂️ Columns to clean
cols_to_clean = [
    'LanguageHaveWorkedWith', 'DatabaseHaveWorkedWith', 'PlatformHaveWorkedWith', 'WebframeHaveWorkedWith',
    'LanguageWantToWorkWith', 'DatabaseWantToWorkWith', 'PlatformWantToWorkWith', 'WebframeWantToWorkWith'
]

# 🧼 Clean semicolon-separated columns — split + strip only (no exploding yet)
for col in cols_to_clean:
    df_clean[col + 'Clean'] = df_clean[col].str.split(';').apply(lambda x: [i.strip() for i in x])

# 💥 Explode all cleaned columns *together* (safely)
# Only explode rows where all clean columns have lists of same length (or fill missing with NaN)
from pandas.core.reshape.concat import concat

# Step 1: create a new column with the max length of the lists per row
max_len = df_clean[[col + 'Clean' for col in cols_to_clean]].applymap(len).max(axis=1)

# Step 2: repeat rows based on max list length
df_clean = df_clean.loc[df_clean.index.repeat(max_len)].reset_index(drop=True)

# Step 3: flatten the lists one by one
for col in cols_to_clean:
    df_clean[col + 'Clean'] = df_clean[col + 'Clean'].explode(ignore_index=True)

# 🗑️ Keep only the necessary columns to reduce file size
columns_to_keep = [
    'LanguageHaveWorkedWithClean', 'DatabaseHaveWorkedWithClean', 'PlatformHaveWorkedWithClean', 'WebframeHaveWorkedWithClean',
    'LanguageWantToWorkWithClean', 'DatabaseWantToWorkWithClean', 'PlatformWantToWorkWithClean', 'WebframeWantToWorkWithClean',
    'Age', 'Country', 'EdLevel'
]

df_clean = df_clean[columns_to_keep]

# 💾 Save the cleaned DataFrame to CSV
df_clean.to_csv('df_cleanLAB26.csv', index=False)

  max_len = df_clean[[col + 'Clean' for col in cols_to_clean]].applymap(len).max(axis=1)
