# Data Manipulation Survey Monkey Dataset

### Step 1: Loading necessary libraries and reading the Survey Monkey dataset

In [1]:
import pandas as pd
import os

# Get the current working directory
pwd = os.getcwd()

# Read the Excel file into a DataFrame
df = pd.read_excel(pwd + "/Data - Survey Monkey Output Edited.xlsx", sheet_name="Edited_Data")

# Creating a copy of the original DataFrame
df_modified = df.copy()

### Step 2: Dropping unnecessary columns from the modified DataFrame

In [2]:
# Selecting columns to drop
drop_columns = list(df_modified.columns)[1:7]

# Dropping selected columns from the modified DataFrame
df_modified.drop(columns=drop_columns, inplace=True)

### Step 3: Reshaping the DataFrame using pd.melt

In [3]:
# Selecting columns to be melted (values)
value_vars = df_modified.columns[8:]

# Selecting columns to be kept as identifiers
id_vars = df_modified.columns[:8]

# Using pd.melt to reshape the DataFrame
df_melted = pd.melt(df_modified, id_vars=id_vars, value_vars=value_vars, var_name='Question + Subquestion', value_name='Answer', col_level=None)


### Step 4: Preparing the 'question' DataFrame for merging

In [4]:
# Reading the 'Question' sheet from the Excel file into a DataFrame
question_imported = pd.read_excel(pwd + "/Data - Survey Monkey Output Edited.xlsx", sheet_name="Question")

# Creating a copy of the DataFrame for further processing
question = question_imported

# Dropping unnecessary columns from the DataFrame
question.drop(columns=["Raw Question", "Raw Subquestion", "Subquestion"], inplace=True)


### Step 5: Merging the melted DataFrame with the 'question' DataFrame

In [5]:
df_merged = pd.merge(left=df_melted, right=question, how='left', left_on='Question + Subquestion', right_on='Question + Subquestion')

# Creating a subset 'respondents' from the merged DataFrame where the 'Answer' column is not null
respondents = df_merged[df_merged["Answer"].notna()]

# Grouping the 'respondents' DataFrame by "Question" and counting unique Respondent IDs
respondents = respondents.groupby(["Question"])["Respondent ID"].nunique().reset_index()

# Renaming the column to 'Respondents'
respondents.rename(columns={"Respondent ID": "Respondents"}, inplace=True)

### Step 6: Further merging with additional respondent information

In [6]:
# Merging the previously merged DataFrame 'df_merged' with the 'respondents' DataFrame on the 'Question' column
df_merged_two = pd.merge(left=df_merged, right=respondents, how='left', left_on='Question', right_on='Question')

# Creating a subset 'same_answer' from the second merged DataFrame where the 'Answer' column is not null
same_answer = df_merged_two[df_merged_two["Answer"].notna()]

# Grouping the 'same_answer' DataFrame by "Question + Subquestion" and "Answer", counting unique Respondent IDs
same_answer = same_answer.groupby(["Question + Subquestion", "Answer"])["Respondent ID"].nunique().reset_index()

# Renaming the column to 'Same Answer'
same_answer.rename(columns={"Respondent ID": "Same Answer"}, inplace=True)


### Step 7: Merging the second merged DataFrame with 'same_answer' information

In [7]:
# Merging the previously merged DataFrame 'df_merged_two' with the 'same_answer' DataFrame
df_merged_three = pd.merge(left=df_merged_two, right=same_answer, how='left', left_on=["Question + Subquestion", "Answer"], right_on=["Question + Subquestion", "Answer"])

# Correcting the chained assignment warning by using the original DataFrame and avoiding inplace
df_merged_three["Same Answer"] = df_merged_three["Same Answer"].fillna(0)


### Step 8: Renaming columns for better readability

In [8]:
# Creating a dictionary for renaming columns
rename_dict = {"Division Primary": "Division Primary", "Division Secondary": "Division Secondary", "Position": "Position",
               "Generation": "Generation", "Gender": "Gender", "Tenure": "Tenure", "Employment Type": "Employment Type"}

# Creating a copy of the original DataFrame
output = df_merged_three.copy()

# Renaming columns using the created dictionary
output.rename(columns=rename_dict, inplace=True)


### Step 9: Saving the final output to an Excel file

In [9]:
import os

# Get the current working directory
pwd = os.getcwd()

# Save the DataFrame to an Excel file
output.to_excel(pwd + "/Final_Output.xlsx", index=False)


## Summary: 
The code performs data manipulation and reshaping on a Survey Monkey dataset. It involves dropping unnecessary columns,
reshaping using pd.melt, merging with additional question information, and creating a final output. The resulting DataFrame is saved
to an Excel file named 'Final_Output.xlsx'.