# Convert

This notebook converts the survey results exported from SurveyMonkey to the format required by the OppScore notebook.

In [2]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("SurveyMonkey.csv")

In [3]:
# Drop columns that are not "How important is it" or "How satisfied are you"
keepcol = lambda col: col.startswith("How important is it") or col.startswith("How satisfied are you")
df.drop(columns=[col for col in df.columns if not keepcol(col)], inplace=True)

In [4]:
def get_outcome(question: str) -> str:
    if question.find("API-first development process") != -1:
        return "API-first development process"
    elif question.find("minimize the amount of API design metadata") != -1:
        return "Minimize the amount of API design metadata"
    elif question.find("validate query/header/route parameters") != -1:
        return "Validate route handler inputs"
    elif question.find("include the security scheme and security requirements") != -1:
        return "Include the security scheme and requirements"
    elif question.find("use features / libraries that are native AOT friendly") != -1:
        return "Use features that are native AOT friendly"

    return "Unknown"



In [5]:
# Create a new data frame with three columns: Outcome, Importance, Satisfaction
# The Outcome column will have the question text
# The Importance column will have the importance rating
# The Satisfaction column will have the satisfaction rating
df2 = pd.DataFrame(columns=["outcome", "importance", "satisfaction"])

while len(df.columns) > 0:
    question = df.columns[0]
    outcome = get_outcome(question)
    # The importance is the first column, the satisfaction is the second column
    importance = df.iloc[1:, 0]
    satisfaction = df.iloc[1:, 1]

    df3 = pd.DataFrame(columns=["outcome", "importance", "satisfaction"])
    df3["outcome"] = [outcome] * len(importance)
    df3["importance"] = importance
    df3["satisfaction"] = satisfaction

    # Append the df3 data frame to df2
    df2 = pd.concat([df2, df3])

    # Drop the first two columns
    df.drop(columns=df.columns[:2], inplace=True)

# print(df2)


In [6]:
# Convert the importance and satisfaction columns to numeric values

importanceToNumber = {"Not at all important": 1, "Not so important": 2, "Neutral": 3, "Somewhat important": 3, "Very important": 4, "Extremely important": 5}
satisfactionToNumber = {"Very dissatisfied": 1, "Dissatisfied": 2, "Neither satisfied nor dissatisfied": 3, "Satisfied": 4, "Very satisfied": 5}

df2["importance"] = df2["importance"].map(importanceToNumber)
df2["satisfaction"] = df2["satisfaction"].map(satisfactionToNumber)

# print(df2)

In [7]:
# drop NAN values
df2.dropna(inplace=True)

# Write the data frame to a new CSV file
df2.to_csv("OppScore.csv", index=False)