In [1]:
import pandas as pd
import numpy as np
from scipy import stats

import warnings
warnings.filterwarnings("ignore")

In [2]:
#raw = importData('E29_CHECKIN1_MACROANALYSIS_V1.csv').replace("No Response", np.nan)

In [3]:
def importData(csv):
    data = pd.read_csv(csv)
    return data

#Break down goal setting and team process into: goals, support, communication, work allocation, team role, and enjoyment
#Then, find the magnitude of such difficulties for each team
def getCategoryDifferences(df, teamNumberCol):
    #Subset df to just goal setting and team process quantitative questions
    columnsOfInterest = [teamNumberCol, "SharedGoalTeamAvg", "SharedGoalClassAvg", 
                         "IndvGoalTeamAvg", "IndvGoalClassAvg", "SupportTeamAvg", 
                         "SupportClassAvg", "CommunicateTeamAvg", "CommunicateClassAvg", 
                         "WorkAllocTeamAvg", "WorkAllocClassAvg", "RoleTeamAvg", 
                         "RoleClassAvg", "EnjoyTeamAvg", "EnjoyClassAvg", 
                        'ConflictResTeamAvg', 'ConflictResClassAvg', 'ConflictResClassStDev',
                        'EfficiencyTeamAvg', 'EfficiencyClassAvg', 'EfficiencyClassStDev',
                        'ExpressionTeamAvg', 'ExpressionClassAvg', 'ExpressionClassStDev']
    temp = df[columnsOfInterest]
    
    #Collapse rows so that 1 row = 1 team
    temp = temp.drop_duplicates()
    
    #Calculate all averages (= team's cumulative deviation from class average)
    temp['goalsDiff'] = (temp["SharedGoalTeamAvg"] + temp["IndvGoalTeamAvg"]) - (temp["SharedGoalClassAvg"] + temp["IndvGoalClassAvg"])
    temp['supportDiff'] = temp["SupportTeamAvg"] - temp["SupportClassAvg"]
    temp['communicateDiff'] = temp["CommunicateTeamAvg"] - temp["CommunicateClassAvg"]
    temp['workAllocationDiff'] = temp["WorkAllocTeamAvg"] - temp["WorkAllocClassAvg"]
    temp['teamRoleDiff'] = temp["RoleTeamAvg"] - temp["RoleClassAvg"]
    temp['enjoyDiff'] = temp["EnjoyTeamAvg"] - temp["EnjoyClassAvg"]
    temp['conflictResDiff'] = temp["ConflictResTeamAvg"] - temp["ConflictResClassAvg"]
    temp['efficiencyDiff'] = temp['EfficiencyTeamAvg'] - temp['EfficiencyClassAvg']
    temp['expressionDiff'] = temp['ExpressionTeamAvg'] - temp['ExpressionClassAvg']
    temp['overallDiff'] = round(temp['goalsDiff'] + temp['supportDiff'] + temp['communicateDiff'] + temp['workAllocationDiff'] + temp['teamRoleDiff'] + temp['enjoyDiff'] + temp['conflictResDiff'] + temp['expressionDiff'] + temp['efficiencyDiff'], 2)
    
    tempRanked = temp.sort_values(teamNumberCol, ascending=True).reset_index(drop=True)
    tempRanked = tempRanked[[teamNumberCol, 'overallDiff', 'goalsDiff', 'supportDiff', 
                             'communicateDiff', 'workAllocationDiff', 'teamRoleDiff', 'enjoyDiff',
                            'conflictResDiff', 'efficiencyDiff', 'expressionDiff']]
    
    return tempRanked

#Assign each team a percentile (score) based on their overall difference from the class average. A difference of 0 designates a 50 (median)
def getTeamPercentiles(df, teamNumberCol):

    arr = np.array(df['overallDiff'])
    df["percentile"] = [round(stats.percentileofscore(arr, x), 2) for x in arr]

    return df
#Assign a category according to each team's percentile
def categorizePercentiles(df, teamNumberCol):
    arr = np.array(df['percentile'])
    cats = []
    #print(arr)
    for score in arr:
        if score >= 75:
            cats.append("Exceptional")
        elif score >= 50 and score < 75:
            cats.append("Strong")
        elif score >= 20 and score < 50:
            cats.append("Standard")
        elif score >= 10 and score < 20:
            cats.append("Weak")
        elif score >= 0 and score < 10:
            cats.append("Needs Attention")
        else:
            cats.append("Team did not respond")
    #print(cats)
    df["percentileCategory"] = cats
    return df

In [4]:
CHECKIN = pd.read_csv("OCONNELL_CHECKIN01_MACROANALYSIS_V1.csv")

In [5]:
diffs = getCategoryDifferences(CHECKIN, 'TeamNumber')

In [6]:
percentiles = getTeamPercentiles(diffs, 'TeamNumber')

In [7]:
scored = categorizePercentiles(percentiles, 'TeamNumber')

In [8]:
scored.head()

Unnamed: 0,TeamNumber,overallDiff,goalsDiff,supportDiff,communicateDiff,workAllocationDiff,teamRoleDiff,enjoyDiff,conflictResDiff,efficiencyDiff,expressionDiff,percentile,percentileCategory
0,1,2.9,0.6,0.3,0.2,0.5,0.3,0.3,0.3,0.1,0.3,94.12,Exceptional
1,2,-0.5,0.2,-0.1,-0.2,-0.1,-0.3,0.3,-0.1,0.1,-0.3,47.06,Standard
2,3,1.0,0.1,-0.1,0.1,-0.3,0.2,0.3,0.2,0.2,0.3,64.71,Strong
3,4,0.1,0.4,0.1,0.2,0.1,0.1,-0.1,-0.5,-0.3,0.1,52.94,Strong
4,5,-0.9,-0.2,-0.1,0.0,-0.1,-0.1,-0.3,-0.3,0.1,0.1,38.24,Standard


In [10]:
roster = pd.read_csv("OCONNELL_CHECKIN01_ROSTER.csv")
#roster = roster[["TeamName", "TeamNumber"]].drop_duplicates().reset_index()
macro = pd.read_csv("OCONNELL_CHECKIN01_MACROANALYSIS_V1.csv")
scored = scored

In [11]:
scored.to_csv("OCONNELL_CHECKIN01_TEAM_PERCENTILES.csv")

In [14]:
final = macro.merge(scored, on='TeamNumber').merge(roster, on='TeamNumber')

In [15]:
final

Unnamed: 0.1,Unnamed: 0,FullName_x,Email_x,TeamNumber,TeammateNumber_x,Our team is clear about the shared goals for our work together,SharedGoalTeamAvg,SharedGoalClassAvg,SharedGoalClassStDev,We each know about one another's individual goals for our work together,...,FullName_y,Email_y,Grad/UG,TeammateNumber_y,Teammate1,Teammate2,Teammate3,Teammate4,Teammate5,Teammate6
0,0,Kai Sims,ksims@berkeley.edu,1,1,7.0,6.6,6.4,0.8,7.0,...,Kai Sims,ksims@berkeley.edu,Grad,1,Kai Sims,Declan Fitzgerald,Shelby Desroches,Sophie Polidoro,Kamron Soltani,
1,0,Kai Sims,ksims@berkeley.edu,1,1,7.0,6.6,6.4,0.8,7.0,...,Declan Fitzgerald,declan.fitzgerald@berkeley.edu,Grad,2,Kai Sims,Declan Fitzgerald,Shelby Desroches,Sophie Polidoro,Kamron Soltani,
2,0,Kai Sims,ksims@berkeley.edu,1,1,7.0,6.6,6.4,0.8,7.0,...,Shelby Desroches,shelby_desroches@berkeley.edu,Grad,3,Kai Sims,Declan Fitzgerald,Shelby Desroches,Sophie Polidoro,Kamron Soltani,
3,0,Kai Sims,ksims@berkeley.edu,1,1,7.0,6.6,6.4,0.8,7.0,...,Sophie Polidoro,sophie_polidoro@berkeley.edu,Grad,4,Kai Sims,Declan Fitzgerald,Shelby Desroches,Sophie Polidoro,Kamron Soltani,
4,0,Kai Sims,ksims@berkeley.edu,1,1,7.0,6.6,6.4,0.8,7.0,...,Kamron Soltani,kamronsoltani@berkeley.edu,Grad,5,Kai Sims,Declan Fitzgerald,Shelby Desroches,Sophie Polidoro,Kamron Soltani,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
404,82,Brian Hsu,brian_hsu@berkeley.edu,17,5,5.0,6.0,6.4,0.8,6.0,...,Alex Thu,alexthu@berkeley.edu,UG,1,Alex Thu,Arunima Basu,Taylor Jazan,Alan Yang,Brian Hsu,
405,82,Brian Hsu,brian_hsu@berkeley.edu,17,5,5.0,6.0,6.4,0.8,6.0,...,Arunima Basu,arunimabasu@berkeley.edu,UG,2,Alex Thu,Arunima Basu,Taylor Jazan,Alan Yang,Brian Hsu,
406,82,Brian Hsu,brian_hsu@berkeley.edu,17,5,5.0,6.0,6.4,0.8,6.0,...,Taylor Jazan,tjazan@berkeley.edu,UG,3,Alex Thu,Arunima Basu,Taylor Jazan,Alan Yang,Brian Hsu,
407,82,Brian Hsu,brian_hsu@berkeley.edu,17,5,5.0,6.0,6.4,0.8,6.0,...,Alan Yang,yaj760@berkeley.edu,UG,4,Alex Thu,Arunima Basu,Taylor Jazan,Alan Yang,Brian Hsu,


In [16]:
final.to_csv("OCONNELL_CHECKIN01_FULL_ANALYSIS_V1.csv")