In [1]:
import pandas as pd
import numpy as np
from scipy import stats

import warnings
warnings.filterwarnings("ignore")

In [15]:
!ls

E29_CHECKIN03_FULL_ANALYSIS_V1.csv    E29_CLEANING.ipynb
E29_CHECKIN03_MACROANALYSIS_V1.csv    E29_FACULTY.ipynb
E29_CHECKIN03_QUESTION_DICTIONARY.csv E29_TeamPercentiles.ipynb
E29_CHECKIN03_RAW_V1.csv              full_cleaned.csv
E29_CHECKIN03_ROSTER.csv


In [12]:
#raw = importData('E29_CHECKIN1_MACROANALYSIS_V1.csv').replace("No Response", np.nan)

In [13]:
def importData(csv):
    data = pd.read_csv(csv)
    return data

#Break down goal setting and team process into: goals, support, communication, work allocation, team role, and enjoyment
#Then, find the magnitude of such difficulties for each team
def getCategoryDifferences(df, teamNumberCol):
    #Subset df to just goal setting and team process quantitative questions
    columnsOfInterest = [teamNumberCol, "SharedGoalTeamAvg", "SharedGoalClassAvg", 
                         "IndvGoalTeamAvg", "IndvGoalClassAvg", "SupportTeamAvg", 
                         "SupportClassAvg", "CommunicateTeamAvg", "CommunicateClassAvg", 
                         "WorkAllocTeamAvg", "WorkAllocClassAvg", "RoleTeamAvg", 
                         "RoleClassAvg", "EnjoyTeamAvg", "EnjoyClassAvg", 
                        'ConflictResTeamAvg', 'ConflictResClassAvg', 'ConflictResClassStDev',
                        'EfficiencyTeamAvg', 'EfficiencyClassAvg', 'EfficiencyClassStDev',
                        'ExpressionTeamAvg', 'ExpressionClassAvg', 'ExpressionClassStDev']
    temp = df[columnsOfInterest]
    
    #Collapse rows so that 1 row = 1 team
    temp = temp.drop_duplicates()
    
    #Calculate all averages (= team's cumulative deviation from class average)
    temp['goalsDiff'] = (temp["SharedGoalTeamAvg"] + temp["IndvGoalTeamAvg"]) - (temp["SharedGoalClassAvg"] + temp["IndvGoalClassAvg"])
    temp['supportDiff'] = temp["SupportTeamAvg"] - temp["SupportClassAvg"]
    temp['communicateDiff'] = temp["CommunicateTeamAvg"] - temp["CommunicateClassAvg"]
    temp['workAllocationDiff'] = temp["WorkAllocTeamAvg"] - temp["WorkAllocClassAvg"]
    temp['teamRoleDiff'] = temp["RoleTeamAvg"] - temp["RoleClassAvg"]
    temp['enjoyDiff'] = temp["EnjoyTeamAvg"] - temp["EnjoyClassAvg"]
    temp['conflictResDiff'] = temp["ConflictResTeamAvg"] - temp["ConflictResClassAvg"]
    temp['efficiencyDiff'] = temp['EfficiencyTeamAvg'] - temp['EfficiencyClassAvg']
    temp['expressionDiff'] = temp['ExpressionTeamAvg'] - temp['ExpressionClassAvg']
    temp['overallDiff'] = round(temp['goalsDiff'] + temp['supportDiff'] + temp['communicateDiff'] + temp['workAllocationDiff'] + temp['teamRoleDiff'] + temp['enjoyDiff'] + temp['conflictResDiff'] + temp['expressionDiff'] + temp['efficiencyDiff'], 2)
    
    tempRanked = temp.sort_values(teamNumberCol, ascending=True).reset_index(drop=True)
    tempRanked = tempRanked[[teamNumberCol, 'overallDiff', 'goalsDiff', 'supportDiff', 
                             'communicateDiff', 'workAllocationDiff', 'teamRoleDiff', 'enjoyDiff',
                            'conflictResDiff', 'efficiencyDiff', 'expressionDiff']]
    
    return tempRanked

#Assign each team a percentile (score) based on their overall difference from the class average. A difference of 0 designates a 50 (median)
def getTeamPercentiles(df, teamNumberCol):

    arr = np.array(df['overallDiff'])
    df["percentile"] = [round(stats.percentileofscore(arr, x), 2) for x in arr]

    return df
#Assign a category according to each team's percentile
def categorizePercentiles(df, teamNumberCol):
    arr = np.array(df['percentile'])
    cats = []
    #print(arr)
    for score in arr:
        if score >= 75:
            cats.append("Exceptional")
        elif score >= 50 and score < 75:
            cats.append("Strong")
        elif score >= 20 and score < 50:
            cats.append("Standard")
        elif score >= 10 and score < 20:
            cats.append("Weak")
        elif score >= 0 and score < 10:
            cats.append("Needs Attention")
        else:
            cats.append("Team did not respond")
    #print(cats)
    df["percentileCategory"] = cats
    return df

In [14]:
E29_CHECKIN02 = pd.read_csv("E29_CHECKIN04_MACROANALYSIS_V1.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'E29_CHECKIN04_MACROANALYSIS_V1.csv'

In [9]:
diffs = getCategoryDifferences(E29_CHECKIN02, 'TeamNumber')

In [10]:
percentiles = getTeamPercentiles(diffs, 'TeamNumber')

In [11]:
scored = categorizePercentiles(percentiles, 'TeamNumber')

In [12]:
scored.head()

Unnamed: 0,TeamNumber,overallDiff,goalsDiff,supportDiff,communicateDiff,workAllocationDiff,teamRoleDiff,enjoyDiff,conflictResDiff,efficiencyDiff,expressionDiff,percentile,percentileCategory
0,1,4.4,0.9,0.7,0.7,0.3,0.3,0.3,0.5,0.2,0.5,73.33,Strong
1,2,2.3,0.5,0.2,0.2,0.6,0.1,0.1,0.3,0.3,0.0,56.67,Strong
2,3,-0.5,-0.1,0.1,0.2,-0.2,-0.4,0.0,-0.5,0.2,0.2,40.0,Standard
3,4,1.1,0.9,0.4,0.2,0.3,-0.2,-0.2,0.0,0.0,-0.3,50.0,Strong
4,5,-1.4,-0.1,-0.1,-0.3,-1.2,0.3,0.3,0.0,0.0,-0.3,33.33,Standard


In [13]:
roster = pd.read_csv("E29_CHECKIN03_ROSTER.csv")
roster = roster[["Group", "TeamNumber"]].drop_duplicates().reset_index()
macro = pd.read_csv("E29_CHECKIN03_MACROANALYSIS_V1.csv")
scored = scored

In [14]:
scored

Unnamed: 0,TeamNumber,overallDiff,goalsDiff,supportDiff,communicateDiff,workAllocationDiff,teamRoleDiff,enjoyDiff,conflictResDiff,efficiencyDiff,expressionDiff,percentile,percentileCategory
0,1,4.4,0.9,0.7,0.7,0.3,0.3,0.3,0.5,0.2,0.5,73.33,Strong
1,2,2.3,0.5,0.2,0.2,0.6,0.1,0.1,0.3,0.3,0.0,56.67,Strong
2,3,-0.5,-0.1,0.1,0.2,-0.2,-0.4,0.0,-0.5,0.2,0.2,40.0,Standard
3,4,1.1,0.9,0.4,0.2,0.3,-0.2,-0.2,0.0,0.0,-0.3,50.0,Strong
4,5,-1.4,-0.1,-0.1,-0.3,-1.2,0.3,0.3,0.0,0.0,-0.3,33.33,Standard
5,6,5.6,0.9,0.4,0.7,0.8,0.3,0.8,0.5,0.5,0.7,93.33,Exceptional
6,7,-15.0,-3.1,-1.4,-1.8,-2.0,-0.9,-2.2,-1.3,-1.7,-0.6,6.67,Needs Attention
7,8,5.4,1.1,0.5,0.6,0.7,0.4,0.6,0.6,0.4,0.5,90.0,Exceptional
8,9,2.4,0.3,0.4,0.5,0.5,-0.2,0.1,0.5,0.3,0.0,61.67,Strong
9,10,7.4,1.1,0.5,1.0,0.9,0.6,0.8,0.8,1.0,0.7,96.67,Exceptional


In [15]:
final = macro.merge(scored, on='TeamNumber').merge(roster, on='TeamNumber')

In [16]:
final

Unnamed: 0.1,Unnamed: 0,FullName,Email,TeamNumber,TeammateNumber,Our team is clear about the shared goals for our work together,SharedGoalTeamAvg,SharedGoalClassAvg,SharedGoalClassStDev,We each know about one another's individual goals for our work together,...,workAllocationDiff,teamRoleDiff,enjoyDiff,conflictResDiff,efficiencyDiff,expressionDiff,percentile,percentileCategory,index,Group
0,0,Arush Godha,arush.godha@berkeley.edu,1,1,7.0,6.8,6.1,1.1,7.0,...,0.3,0.3,0.3,0.5,0.2,0.5,73.33,Strong,0,101-1
1,1,Aaron Lee,aaronlee5702@berkeley.edu,1,2,,6.8,6.1,1.1,,...,0.3,0.3,0.3,0.5,0.2,0.5,73.33,Strong,0,101-1
2,2,Naomi Nogueira,naomi_nogueira@berkeley.edu,1,3,6.0,6.8,6.1,1.1,5.0,...,0.3,0.3,0.3,0.5,0.2,0.5,73.33,Strong,0,101-1
3,3,Khant Nyi,knyi@berkeley.edu,1,4,7.0,6.8,6.1,1.1,6.0,...,0.3,0.3,0.3,0.5,0.2,0.5,73.33,Strong,0,101-1
4,4,Brandon Ramos-Bailon,bramos6837@berkeley.edu,1,5,,6.8,6.1,1.1,,...,0.3,0.3,0.3,0.5,0.2,0.5,73.33,Strong,0,101-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,167,Pablo Brunet,pbrunet@berkeley.edu,30,2,7.0,6.6,6.1,1.1,7.0,...,0.7,0.2,0.4,0.4,0.6,0.3,86.67,Exceptional,166,105-6
168,168,Eilyn Garcia,garciaeilyn@berkeley.edu,30,3,7.0,6.6,6.1,1.1,7.0,...,0.7,0.2,0.4,0.4,0.6,0.3,86.67,Exceptional,166,105-6
169,169,Tiffelyn Kurniawan,tifkurnia2004@berkeley.edu,30,4,7.0,6.6,6.1,1.1,7.0,...,0.7,0.2,0.4,0.4,0.6,0.3,86.67,Exceptional,166,105-6
170,170,Eric Lind,elind1@berkeley.edu,30,5,,6.6,6.1,1.1,,...,0.7,0.2,0.4,0.4,0.6,0.3,86.67,Exceptional,166,105-6


In [17]:
final.to_csv("E29_CHECKIN03_FULL_ANALYSIS_V1.csv")