In [1]:
import pandas as pd

In [2]:
people = {
    "first": ["Al", "Bhosdu", "Babblu"], 
    "last": ["Singh", "Lullu", "Capone"],
    "email": ["as@gmal.com", "bl@email.com", "bs@email.com"]
}

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Capone,bs@email.com


In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [6]:
df.columns = ["first_name", "last_name", "email_id"]

In [7]:
df

Unnamed: 0,first_name,last_name,email_id
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Capone,bs@email.com


In [8]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL_ID
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Capone,bs@email.com


In [9]:
df.columns = df.columns.str.replace("_","-")
df

Unnamed: 0,FIRST-NAME,LAST-NAME,EMAIL-ID
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Capone,bs@email.com


In [10]:
df.columns = [x.lower() for x in df.columns]
df.columns = df.columns.str.replace("-", "_")
df

Unnamed: 0,first_name,last_name,email_id
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Capone,bs@email.com


ONLY CHANGE SOME COLUMNS?

In [11]:
df.rename(columns={"first_name": "first", "last_name": "last"}, inplace=True)

In [12]:
df

Unnamed: 0,first,last,email_id
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Capone,bs@email.com


In [15]:
df.loc[2, ["last", "email_id"]] = ["Bhosda", "hello123@org.edu"]

In [16]:
df

Unnamed: 0,first,last,email_id
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Bhosda,hello123@org.edu


In [18]:
df.rename(columns={"email_id": "email"}, inplace=True)
df

Unnamed: 0,first,last,email
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Bhosda,hello123@org.edu


In [19]:
df["email"] = df["email"].str.upper()

In [20]:
df

Unnamed: 0,first,last,email
0,Al,Singh,AS@GMAL.COM
1,Bhosdu,Lullu,BL@EMAIL.COM
2,Babblu,Bhosda,HELLO123@ORG.EDU


In [21]:
df["email"] = df["email"].str.lower()

In [22]:
df

Unnamed: 0,first,last,email
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Bhosda,hello123@org.edu


CHANGING MULTIPLE ROWS - MORE ADVANCED CHANGES

In [25]:
# apply - used for calling a function on our values
# map
# applymap
# replace

In [26]:
df["email"].apply(len)

0    11
1    12
2    16
Name: email, dtype: int64

In [27]:
def update_email(email):
    return email.upper()

In [28]:
df["email"].apply(update_email)

0         AS@GMAL.COM
1        BL@EMAIL.COM
2    HELLO123@ORG.EDU
Name: email, dtype: object

In [29]:
df

Unnamed: 0,first,last,email
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Bhosda,hello123@org.edu


In [31]:
df["email"] = df["email"].apply(update_email)

In [32]:
df

Unnamed: 0,first,last,email
0,Al,Singh,AS@GMAL.COM
1,Bhosdu,Lullu,BL@EMAIL.COM
2,Babblu,Bhosda,HELLO123@ORG.EDU


In [33]:
df["email"] = df["email"].apply(lambda x: x.lower())

In [34]:
df

Unnamed: 0,first,last,email
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Bhosda,hello123@org.edu


In [36]:
df["email"].apply(len)

0    11
1    12
2    16
Name: email, dtype: int64

In [39]:
df.apply(len)
# THIS APPLIES LEN FUNCTION TO EACH SERIES I.E COLUMNS IN THE DATAFRAME,
# NUMBER OF ROWS IN EACH COLUMN
# NOT TO EVERY VALUE IN THE DATAFRAME

first    3
last     3
email    3
dtype: int64

In [40]:
len(df["email"])

3

In [42]:
df.apply(pd.Series.min)
# gives minimum value in every series object/column

first             Al
last          Bhosda
email    as@gmal.com
dtype: object

In [47]:
# APPLYMAP - applies a function to every element in dataframe
df.applymap(len)

  df.applymap(len)


Unnamed: 0,first,last,email
0,2,5,11
1,6,5,12
2,6,6,16


In [51]:
# applies a function to every element in dataframe
# DATFRAME.map()
df.map(str.lower)

Unnamed: 0,first,last,email
0,al,singh,as@gmal.com
1,bhosdu,lullu,bl@email.com
2,babblu,bhosda,hello123@org.edu


In [52]:
df.map(len)

Unnamed: 0,first,last,email
0,2,5,11
1,6,5,12
2,6,6,16


In [55]:
# Series.map
df["first"].map({"Al": "Hal"})

0    Hal
1    NaN
2    NaN
Name: first, dtype: object

In [56]:
df

Unnamed: 0,first,last,email
0,Al,Singh,as@gmal.com
1,Bhosdu,Lullu,bl@email.com
2,Babblu,Bhosda,hello123@org.edu


In [57]:
df2 = pd.read_csv("data/survey_results_public.csv", index_col="ResponseId")
schema_df = pd.read_csv("data/survey_results_schema.csv", index_col="qname")

In [58]:
pd.set_option("display.max_columns", 85)
pd.set_option("display.max_rows", 85)

In [59]:
df2.head()

Unnamed: 0_level_0,Q120,MainBranch,Age,Employment,RemoteWork,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,LearnCodeCoursesCert,YearsCode,YearsCodePro,DevType,OrgSize,PurchaseInfluence,TechList,BuyNewTool,Country,Currency,CompTotal,LanguageHaveWorkedWith,LanguageWantToWorkWith,DatabaseHaveWorkedWith,DatabaseWantToWorkWith,PlatformHaveWorkedWith,PlatformWantToWorkWith,WebframeHaveWorkedWith,WebframeWantToWorkWith,MiscTechHaveWorkedWith,MiscTechWantToWorkWith,ToolsTechHaveWorkedWith,ToolsTechWantToWorkWith,NEWCollabToolsHaveWorkedWith,NEWCollabToolsWantToWorkWith,OpSysPersonal use,OpSysProfessional use,OfficeStackAsyncHaveWorkedWith,OfficeStackAsyncWantToWorkWith,OfficeStackSyncHaveWorkedWith,OfficeStackSyncWantToWorkWith,AISearchHaveWorkedWith,AISearchWantToWorkWith,AIDevHaveWorkedWith,AIDevWantToWorkWith,NEWSOSites,SOVisitFreq,SOAccount,SOPartFreq,SOComm,SOAI,AISelect,AISent,AIAcc,AIBen,AIToolInterested in Using,AIToolCurrently Using,AIToolNot interested in Using,AINextVery different,AINextNeither different nor similar,AINextSomewhat similar,AINextVery similar,AINextSomewhat different,TBranch,ICorPM,WorkExp,Knowledge_1,Knowledge_2,Knowledge_3,Knowledge_4,Knowledge_5,Knowledge_6,Knowledge_7,Knowledge_8,Frequency_1,Frequency_2,Frequency_3,TimeSearching,TimeAnswering,ProfessionalTech,Industry,SurveyLength,SurveyEase,ConvertedCompYearly
ResponseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1
1,I agree,None of these,18-24 years old,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,I agree,I am a developer by profession,25-34 years old,"Employed, full-time",Remote,Hobby;Contribute to open-source projects;Boots...,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;Friend or fam...,Formal documentation provided by the owner of ...,Other,18.0,9.0,"Senior Executive (C-Suite, VP, etc.)",2 to 9 employees,I have a great deal of influence,Investigate,Start a free trial;Ask developers I know/work ...,United States of America,USD\tUnited States dollar,285000.0,HTML/CSS;JavaScript;Python,Bash/Shell (all shells);C#;Dart;Elixir;GDScrip...,Supabase,Firebase Realtime Database;Supabase,Amazon Web Services (AWS);Netlify;Vercel,Fly.io;Netlify;Render,Next.js;React;Remix;Vue.js,Deno;Elm;Nuxt.js;React;Svelte;Vue.js,Electron;React Native;Tauri,Capacitor;Electron;Tauri;Uno Platform;Xamarin,Docker;Kubernetes;npm;Pip;Vite;Webpack;Yarn,Godot;npm;pnpm;Unity 3D;Unreal Engine;Vite;Web...,Vim;Visual Studio Code,Vim;Visual Studio Code,iOS;iPadOS;MacOS;Windows;Windows Subsystem for...,MacOS;Windows;Windows Subsystem for Linux (WSL),Asana;Basecamp;GitHub Discussions;Jira;Linear;...,GitHub Discussions;Linear;Notion;Trello,Cisco Webex Teams;Discord;Google Chat;Google M...,Discord;Signal;Slack;Zoom,ChatGPT,ChatGPT;Neeva AI,GitHub Copilot,GitHub Copilot,Stack Overflow;Stack Exchange,Daily or almost daily,Yes,A few times per month or weekly,"Yes, definitely","I don't think it's super necessary, but I thin...",Yes,Indifferent,Other (please explain),Somewhat distrust,Learning about a codebase;Writing code;Debuggi...,Writing code;Committing and reviewing code,,,,,,,Yes,People manager,10.0,Strongly agree,Agree,Strongly agree,Agree,Agree,Agree,Agree,Strongly agree,1-2 times a week,10+ times a week,Never,15-30 minutes a day,15-30 minutes a day,DevOps function;Microservices;Automated testin...,"Information Services, IT, Software Development...",Appropriate in length,Easy,285000.0
3,I agree,I am a developer by profession,45-54 years old,"Employed, full-time","Hybrid (some remote, some in-person)",Hobby;Professional development or self-paced l...,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;On the job tr...,Formal documentation provided by the owner of ...,,27.0,23.0,"Developer, back-end","5,000 to 9,999 employees",I have some influence,Given a list,Start a free trial;Ask developers I know/work ...,United States of America,USD\tUnited States dollar,250000.0,Bash/Shell (all shells);Go,Haskell;OCaml;Rust,,,Amazon Web Services (AWS);Google Cloud;OpenSta...,,,,,,Cargo;Docker;Kubernetes;Make;Nix,Cargo;Kubernetes;Nix,Emacs;Helix,Emacs;Helix,MacOS;Other Linux-based,MacOS;Other Linux-based,Markdown File;Stack Overflow for Teams,Markdown File,Microsoft Teams;Slack;Zoom,Slack;Zoom,,,,,Stack Overflow;Stack Exchange;Stack Overflow f...,A few times per month or weekly,Yes,Less than once per month or monthly,Neutral,,"No, and I don't plan to",,,,,,,,,,,,Yes,Individual contributor,23.0,Strongly agree,Neither agree nor disagree,Agree,Agree,Agree,Agree,Agree,Agree,6-10 times a week,6-10 times a week,3-5 times a week,30-60 minutes a day,30-60 minutes a day,DevOps function;Microservices;Automated testin...,"Information Services, IT, Software Development...",Appropriate in length,Easy,250000.0
4,I agree,I am a developer by profession,25-34 years old,"Employed, full-time","Hybrid (some remote, some in-person)",Hobby,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Colleague;Friend or family member;Other online...,Formal documentation provided by the owner of ...,,12.0,7.0,"Developer, front-end",100 to 499 employees,I have some influence,Investigate,Start a free trial;Ask developers I know/work ...,United States of America,USD\tUnited States dollar,156000.0,Bash/Shell (all shells);HTML/CSS;JavaScript;PH...,Bash/Shell (all shells);HTML/CSS;JavaScript;Ru...,PostgreSQL;Redis,PostgreSQL;Redis,Cloudflare;Heroku,Cloudflare;Heroku,Node.js;React;Ruby on Rails;Vue.js;WordPress,Node.js;Ruby on Rails;Vue.js,,,Homebrew;npm;Vite;Webpack;Yarn,Homebrew;npm;Vite,IntelliJ IDEA;Vim;Visual Studio Code;WebStorm,IntelliJ IDEA;Vim;WebStorm,iOS;iPadOS;MacOS,iOS;iPadOS;MacOS,Jira,Jira,Discord;Google Meet;Microsoft Teams;Slack;Zoom,Discord;Google Meet;Slack;Zoom,,,,,Stack Overflow;Stack Exchange,A few times per week,Yes,Less than once per month or monthly,"No, not really",I'm wearing of Stack Overflow using AI.,"No, and I don't plan to",,,,,,,,,,,,Yes,Individual contributor,7.0,Strongly agree,Strongly disagree,Strongly agree,Strongly agree,Agree,Neither agree nor disagree,Agree,Agree,1-2 times a week,10+ times a week,1-2 times a week,15-30 minutes a day,30-60 minutes a day,Automated testing;Continuous integration (CI) ...,,Appropriate in length,Easy,156000.0
5,I agree,I am a developer by profession,25-34 years old,"Employed, full-time;Independent contractor, fr...",Remote,Hobby;Contribute to open-source projects;Profe...,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Online Courses or Certi...,Formal documentation provided by the owner of ...,Other;Codecademy;edX,6.0,4.0,"Developer, full-stack",20 to 99 employees,I have some influence,Investigate,Start a free trial;Ask developers I know/work ...,Philippines,PHP\tPhilippine peso,1320000.0,HTML/CSS;JavaScript;TypeScript,HTML/CSS;JavaScript;Python;Rust;TypeScript,BigQuery;Elasticsearch;MongoDB;PostgreSQL,Elasticsearch;MongoDB;PostgreSQL;Redis;Supabase,Amazon Web Services (AWS);Firebase;Heroku;Netl...,Amazon Web Services (AWS);Cloudflare;Digital O...,Express;Gatsby;NestJS;Next.js;Node.js;React,Express;NestJS;Next.js;Node.js;React;Remix;Vue.js,,,Docker;npm;Webpack;Yarn,Docker;npm;Yarn,Vim;Visual Studio Code,Vim;Visual Studio Code,Other (Please Specify):,Other (Please Specify):,Confluence;Jira;Notion,Confluence;Jira;Notion,Discord;Google Meet;Slack;Zoom,Discord;Google Meet;Slack;Zoom,ChatGPT,ChatGPT,,,Stack Overflow;Stack Exchange,A few times per week,No,,Neutral,Using AI to suggest better answer to my questi...,Yes,Very favorable,Increase productivity;Greater efficiency;Speed...,Somewhat trust,Project planning;Testing code;Committing and r...,Learning about a codebase;Writing code;Documen...,,,,,,,Yes,Individual contributor,6.0,Agree,Strongly agree,Agree,Agree,Neither agree nor disagree,Agree,Strongly agree,Agree,1-2 times a week,1-2 times a week,3-5 times a week,60-120 minutes a day,30-60 minutes a day,Microservices;Automated testing;Observability ...,Other,Appropriate in length,Neither easy nor difficult,23456.0


In [60]:
schema_df.head()

Unnamed: 0_level_0,qid,question,force_resp,type,selector
qname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S0,QID16,"<div><span style=""font-size:19px;""><strong>Hel...",False,DB,TB
MetaInfo,QID12,Browser Meta Info,False,Meta,Browser
Q310,QID310,"<div><span style=""font-size:19px;""><strong>You...",False,DB,TB
Q120,QID312,,True,MC,SAVR
S1,QID1,"<span style=""font-size:22px; font-family: aria...",False,DB,TB


In [63]:
schema_df.rename(columns={"qid": "quesid"}, inplace=True)

In [64]:
schema_df

Unnamed: 0_level_0,quesid,question,force_resp,type,selector
qname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S0,QID16,"<div><span style=""font-size:19px;""><strong>Hel...",False,DB,TB
MetaInfo,QID12,Browser Meta Info,False,Meta,Browser
Q310,QID310,"<div><span style=""font-size:19px;""><strong>You...",False,DB,TB
Q120,QID312,,True,MC,SAVR
S1,QID1,"<span style=""font-size:22px; font-family: aria...",False,DB,TB
MainBranch,QID2,Which of the following options best describes ...,True,MC,SAVR
Age,QID127,What is your age? *,True,MC,MAVR
Employment,QID296,Which of the following best describes your cur...,False,MC,MAVR
RemoteWork,QID308,Which best describes your current work situation?,False,MC,SAVR
CodingActivities,QID297,Which of the following best describes the code...,False,MC,MAVR
