In [1]:
import pandas as pd
import numpy as np

In [14]:
#Read file 
def importData(csv):
    data = pd.read_csv(csv)
    return data

#Break down goal setting and team process into: goals, support, communication, work allocation, team role, and enjoyment
#Then, find the magnitude of such difficulties for each team
def getCategoryDifferences(df, teamNumberCol):
    #Subset df to just goal setting and team process quantitative questions
    columnsOfInterest = [teamNumberCol, "SharedGoalTeamAvg", "SharedGoalClassAvg", 
                         "IndvGoalTeamAvg", "IndvGoalClassAvg", "SupportTeamAvg", 
                         "SupportClassAvg", "CommunicateTeamAvg", "CommunicateClassAvg", 
                         "WorkAllocTeamAvg", "WorkAllocClassAvg", "RoleTeamAvg", 
                         "RoleClassAvg", "EnjoyTeamAvg", "EnjoyClassAvg"]
    temp = df[columnsOfInterest]
    
    #Collapse rows so that 1 row = 1 team
    temp = temp.drop_duplicates()
    
    #Calculate all averages (= team's cumulative deviation from class average)
    temp['goalsDiff'] = (temp["SharedGoalTeamAvg"] + temp["IndvGoalTeamAvg"]) - (temp["SharedGoalClassAvg"] + temp["IndvGoalClassAvg"])
    temp['supportDiff'] = temp["SupportTeamAvg"] - temp["SupportClassAvg"]
    temp['communicateDiff'] = temp["CommunicateTeamAvg"] - temp["CommunicateClassAvg"]
    temp['workAllocationDiff'] = temp["WorkAllocTeamAvg"] - temp["WorkAllocClassAvg"]
    temp['teamRoleDiff'] = temp["RoleTeamAvg"] - temp["RoleClassAvg"]
    temp['enjoyDiff'] = temp["EnjoyTeamAvg"] - temp["EnjoyClassAvg"]
    temp['overallDiff'] = round(temp['goalsDiff'] + temp['supportDiff'] + temp['communicateDiff'] + temp['workAllocationDiff'] + temp['teamRoleDiff'] + temp['enjoyDiff'], 2)
    
    tempRanked = temp.sort_values(teamNumberCol, ascending=True).reset_index(drop=True)
    tempRanked = tempRanked[[teamNumberCol, 'overallDiff', 'goalsDiff', 'supportDiff', 'communicateDiff', 'workAllocationDiff', 'teamRoleDiff', 'enjoyDiff']]
    
    return tempRanked

#Add pctImpact placeholder columns based on the list of column names
def addPctColumns(df, pctColumnNamesList):
    for pctCol in pctColumnNamesList:
        df[pctCol] = [''] * len(df)
    return df

#For each team, identify % breakdown of total struggles relative to 6 categories of teaming:
#goal setting, support, communication, work allocation, team roles. and enjoyment
def getTotalBreakdown(df, teamNumberCol, categoryDiffsList, pctColsList):
    allContributions = []
    for team in df[teamNumberCol]:
        teamCategoryTracker = categoryDiffsList.copy()
        
        categoryDiffs = list((df[df[teamNumberCol] == team][categoryDiffsList]).squeeze())
        absOverallDiff = sum(map(abs, categoryDiffs))
        overallDiff = sum(categoryDiffs)
        
        teamContributions = {}

        #round 1: subtract total positive numbers from absOverallDiff
        for category in teamCategoryTracker:
            categoryPctName = category.replace('Diff', '') + 'Pct'
            categoryDiff = float(df[df[teamNumberCol] == team][category])
            
            if categoryDiff >= 0:
                absOverallDiff -= categoryDiff
                teamContributions[categoryPctName] = 0
                #teamCategoryTracker.remove(category)
        
        #round 2: calculate % impact of each remaining category
        for category in teamCategoryTracker:
            categoryPctName = category.replace('Diff', '') + 'Pct'
            absCategoryDiff = abs(df[df[teamNumberCol] == team][category])
            categoryDiff = float(df[df[teamNumberCol] == team][category])
            
            if categoryDiff < 0:
                pctImpact = absCategoryDiff/absOverallDiff
                teamContributions[categoryPctName] = float(pctImpact)
        
        #Add team contributions to all class contributions list
        allContributions.append(teamContributions)
    

    for pctCol in pctColsList:
        #print(index)
        
        pctColVals = []
        for team_dict in allContributions:
            
            
            pctColVals.append(team_dict[pctCol])

        df[pctCol] = pctColVals
    
        
    return df

#Assign each team a percentile (score) based on their overall difference from the class average. A difference of 0 designates a 50 (median)
def getTeamPercentiles(df, teamNumberCol):
    percentiles = []
    for overallDiff in df['overallDiff']:
        if overallDiff <= -5:
            percentiles.append(0)
        elif overallDiff == 0:
            percentiles.append(50)
        elif overallDiff >= 5:
            percentiles.append(100)
        else: 
            percentiles.append(50 + 10*(overallDiff))
    df['percentile'] = percentiles
    return df

#Using impact pct columns, add a column to the dataset that has top category difficulties. Each highlighted category must have an impact of at least the threshold.
def getTopDifficulties(df, teamNumberCol, pctColsList, percentileCol, threshold=.25, includeAboveMedianTeams=True):
    classDifficulties = []
    
    #instantiate new column of top difficulties
    df["topDifficulties"] = [''] * len(df)
    
    teamsToAnalyze = list(df[teamNumberCol])
    for team in teamsToAnalyze:
        teamDifficulties = []
        currTeam = df[df[teamNumberCol] == team]
        #if the team is in bottom 20 percentile, add change threshold to add more topDifficulties
        # TO DO FIND OUT WHY THRESHOLD NOT WORKING
        team_percentile = int(currTeam[percentileCol])
        if team_percentile > 20:
            for pctCol in pctColsList:
                if float(currTeam[pctCol]) > threshold:
                    teamDifficulties.append(pctCol.replace('Pct', ''))
        else:
            #threshold = .1
            for pctCol in pctColsList:
                if float(currTeam[pctCol]) > threshold*.25: #threshold decreased by 75%
                    teamDifficulties.append(pctCol.replace('Pct', ''))
        classDifficulties.append(teamDifficulties)
    df["topDifficulties"] = classDifficulties
    
    return df

# Add teammates to each team in a new column
def addRoster(df, teamNumberCol, rosterCSV, rosterTeamNumberCol, rosterFullNameCol):
    roster = importData(rosterCSV)
    allTeams = []
    for team in df[teamNumberCol]:
        teammate_lst = list(roster[roster[rosterTeamNumberCol] == team][rosterFullNameCol])
        allTeams.append(teammate_lst)
    df["Teammates"] = allTeams
    
    return df

In [15]:
# A function that takes in cleaned macroanalysis data and returns a .csv file of teams, their overall scores, 
# and a breakdown of their teaming difficulties
def getFacultyInsights(macroanalysis_data, teamNumberCol):
    #Variables needed later
    pctCols = ['goalsPct', 'supportPct', 'communicatePct', 'workAllocationPct', 'teamRolePct', 'enjoyPct']
    diffCols = ['goalsDiff', 'supportDiff', 'communicateDiff', 'workAllocationDiff', 'teamRoleDiff', 'enjoyDiff']
    
    #Import data (csv)
    raw = importData(macroanalysis_data)

    #Calculate team average difference from class average relative to 6 key categories: 
    #goal setting, support, communication, work allocation, team roles. and enjoyment
    teamDifferences = getCategoryDifferences(raw, teamNumberCol)
    
    #For each team, identify % of difficulties attributed to each teaming category
    teamsRankedWithBreakdown = addPctColumns(teamDifferences, pctCols)
    teamsRankedWithBreakdown = getTotalBreakdown(teamsRankedWithBreakdown, teamNumberCol, diffCols, pctCols)
    teamsRankedWithBreakdown = round(teamsRankedWithBreakdown, 2)
    
    #Add overall score based on 'overallDiff'
    teamsRanked = getTeamPercentiles(teamsRankedWithBreakdown, teamNumberCol)
    
    #Add Top Difficulties for each team
    teamsRanked = getTopDifficulties(teamsRanked, teamNumberCol, pctCols, 'percentile', .25, True)
    
    #Add teammate names from roster
    final = addRoster(teamsRanked, teamNumberCol, 'E29_REVISED_LASTSORTED_ROSTER.csv', 'TeamNumber', 'FullName')
    
    return final
    

In [17]:
getFacultyInsights('E29_CHECKIN1_MACROANALYSIS_V1.csv', 'TeamNumber').to_csv("Faculty_Insights_v1.csv")